can_implement_as_sibling_call_p REG_PARM_STACK_SPACE check
[official-gcc.git] / gcc / config / i386 / i386.c
blobcaa9b9d5ac1abc083d3ce2e896108edf3798c9d1
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2020 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic.h"
42 #include "cfgbuild.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "calls.h"
47 #include "stor-layout.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "cfgrtl.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "gimplify.h"
60 #include "dwarf2.h"
61 #include "tm-constrs.h"
62 #include "cselib.h"
63 #include "sched-int.h"
64 #include "opts.h"
65 #include "tree-pass.h"
66 #include "context.h"
67 #include "pass_manager.h"
68 #include "target-globals.h"
69 #include "gimple-iterator.h"
70 #include "tree-vectorizer.h"
71 #include "shrink-wrap.h"
72 #include "builtins.h"
73 #include "rtl-iter.h"
74 #include "tree-iterator.h"
75 #include "dbgcnt.h"
76 #include "case-cfn-macros.h"
77 #include "dojump.h"
78 #include "fold-const-call.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "selftest.h"
82 #include "selftest-rtl.h"
83 #include "print-rtl.h"
84 #include "intl.h"
85 #include "ifcvt.h"
86 #include "symbol-summary.h"
87 #include "ipa-prop.h"
88 #include "ipa-fnsummary.h"
89 #include "wide-int-bitmask.h"
90 #include "tree-vector-builder.h"
91 #include "debug.h"
92 #include "dwarf2out.h"
93 #include "i386-options.h"
94 #include "i386-builtins.h"
95 #include "i386-expand.h"
96 #include "i386-features.h"
97 #include "function-abi.h"
99 /* This file should be included last. */
100 #include "target-def.h"
102 static rtx legitimize_dllimport_symbol (rtx, bool);
103 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
104 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
105 static void ix86_emit_restore_reg_using_pop (rtx);
108 #ifndef CHECK_STACK_LIMIT
109 #define CHECK_STACK_LIMIT (-1)
110 #endif
112 /* Return index of given mode in mult and division cost tables. */
113 #define MODE_INDEX(mode) \
114 ((mode) == QImode ? 0 \
115 : (mode) == HImode ? 1 \
116 : (mode) == SImode ? 2 \
117 : (mode) == DImode ? 3 \
118 : 4)
121 /* Set by -mtune. */
122 const struct processor_costs *ix86_tune_cost = NULL;
124 /* Set by -mtune or -Os. */
125 const struct processor_costs *ix86_cost = NULL;
127 /* In case the average insn count for single function invocation is
128 lower than this constant, emit fast (but longer) prologue and
129 epilogue code. */
130 #define FAST_PROLOGUE_INSN_COUNT 20
132 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
133 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
134 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
135 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
137 /* Array of the smallest class containing reg number REGNO, indexed by
138 REGNO. Used by REGNO_REG_CLASS in i386.h. */
140 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
142 /* ax, dx, cx, bx */
143 AREG, DREG, CREG, BREG,
144 /* si, di, bp, sp */
145 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
146 /* FP registers */
147 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
148 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
149 /* arg pointer, flags, fpsr, frame */
150 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
151 /* SSE registers */
152 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
153 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
154 /* MMX registers */
155 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
156 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
157 /* REX registers */
158 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
159 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
160 /* SSE REX registers */
161 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
162 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
163 /* AVX-512 SSE registers */
164 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
165 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
166 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
167 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
168 /* Mask registers. */
169 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
170 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS
173 /* The "default" register map used in 32bit mode. */
175 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
177 /* general regs */
178 0, 2, 1, 3, 6, 7, 4, 5,
179 /* fp regs */
180 12, 13, 14, 15, 16, 17, 18, 19,
181 /* arg, flags, fpsr, frame */
182 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
183 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
184 /* SSE */
185 21, 22, 23, 24, 25, 26, 27, 28,
186 /* MMX */
187 29, 30, 31, 32, 33, 34, 35, 36,
188 /* extended integer registers */
189 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
190 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
191 /* extended sse registers */
192 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
193 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
194 /* AVX-512 registers 16-23 */
195 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
196 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
197 /* AVX-512 registers 24-31 */
198 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 /* Mask registers */
201 93, 94, 95, 96, 97, 98, 99, 100
204 /* The "default" register map used in 64bit mode. */
206 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
208 /* general regs */
209 0, 1, 2, 3, 4, 5, 6, 7,
210 /* fp regs */
211 33, 34, 35, 36, 37, 38, 39, 40,
212 /* arg, flags, fpsr, frame */
213 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
214 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
215 /* SSE */
216 17, 18, 19, 20, 21, 22, 23, 24,
217 /* MMX */
218 41, 42, 43, 44, 45, 46, 47, 48,
219 /* extended integer registers */
220 8, 9, 10, 11, 12, 13, 14, 15,
221 /* extended SSE registers */
222 25, 26, 27, 28, 29, 30, 31, 32,
223 /* AVX-512 registers 16-23 */
224 67, 68, 69, 70, 71, 72, 73, 74,
225 /* AVX-512 registers 24-31 */
226 75, 76, 77, 78, 79, 80, 81, 82,
227 /* Mask registers */
228 118, 119, 120, 121, 122, 123, 124, 125
231 /* Define the register numbers to be used in Dwarf debugging information.
232 The SVR4 reference port C compiler uses the following register numbers
233 in its Dwarf output code:
234 0 for %eax (gcc regno = 0)
235 1 for %ecx (gcc regno = 2)
236 2 for %edx (gcc regno = 1)
237 3 for %ebx (gcc regno = 3)
238 4 for %esp (gcc regno = 7)
239 5 for %ebp (gcc regno = 6)
240 6 for %esi (gcc regno = 4)
241 7 for %edi (gcc regno = 5)
242 The following three DWARF register numbers are never generated by
243 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
244 believed these numbers have these meanings.
245 8 for %eip (no gcc equivalent)
246 9 for %eflags (gcc regno = 17)
247 10 for %trapno (no gcc equivalent)
248 It is not at all clear how we should number the FP stack registers
249 for the x86 architecture. If the version of SDB on x86/svr4 were
250 a bit less brain dead with respect to floating-point then we would
251 have a precedent to follow with respect to DWARF register numbers
252 for x86 FP registers, but the SDB on x86/svr4 was so completely
253 broken with respect to FP registers that it is hardly worth thinking
254 of it as something to strive for compatibility with.
255 The version of x86/svr4 SDB I had does (partially)
256 seem to believe that DWARF register number 11 is associated with
257 the x86 register %st(0), but that's about all. Higher DWARF
258 register numbers don't seem to be associated with anything in
259 particular, and even for DWARF regno 11, SDB only seemed to under-
260 stand that it should say that a variable lives in %st(0) (when
261 asked via an `=' command) if we said it was in DWARF regno 11,
262 but SDB still printed garbage when asked for the value of the
263 variable in question (via a `/' command).
264 (Also note that the labels SDB printed for various FP stack regs
265 when doing an `x' command were all wrong.)
266 Note that these problems generally don't affect the native SVR4
267 C compiler because it doesn't allow the use of -O with -g and
268 because when it is *not* optimizing, it allocates a memory
269 location for each floating-point variable, and the memory
270 location is what gets described in the DWARF AT_location
271 attribute for the variable in question.
272 Regardless of the severe mental illness of the x86/svr4 SDB, we
273 do something sensible here and we use the following DWARF
274 register numbers. Note that these are all stack-top-relative
275 numbers.
276 11 for %st(0) (gcc regno = 8)
277 12 for %st(1) (gcc regno = 9)
278 13 for %st(2) (gcc regno = 10)
279 14 for %st(3) (gcc regno = 11)
280 15 for %st(4) (gcc regno = 12)
281 16 for %st(5) (gcc regno = 13)
282 17 for %st(6) (gcc regno = 14)
283 18 for %st(7) (gcc regno = 15)
285 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
287 /* general regs */
288 0, 2, 1, 3, 6, 7, 5, 4,
289 /* fp regs */
290 11, 12, 13, 14, 15, 16, 17, 18,
291 /* arg, flags, fpsr, frame */
292 IGNORED_DWARF_REGNUM, 9,
293 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
294 /* SSE registers */
295 21, 22, 23, 24, 25, 26, 27, 28,
296 /* MMX registers */
297 29, 30, 31, 32, 33, 34, 35, 36,
298 /* extended integer registers */
299 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
300 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
301 /* extended sse registers */
302 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
303 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
304 /* AVX-512 registers 16-23 */
305 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
306 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
307 /* AVX-512 registers 24-31 */
308 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
309 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
310 /* Mask registers */
311 93, 94, 95, 96, 97, 98, 99, 100
314 /* Define parameter passing and return registers. */
316 static int const x86_64_int_parameter_registers[6] =
318 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
321 static int const x86_64_ms_abi_int_parameter_registers[4] =
323 CX_REG, DX_REG, R8_REG, R9_REG
326 static int const x86_64_int_return_registers[4] =
328 AX_REG, DX_REG, DI_REG, SI_REG
331 /* Define the structure for the machine field in struct function. */
333 struct GTY(()) stack_local_entry {
334 unsigned short mode;
335 unsigned short n;
336 rtx rtl;
337 struct stack_local_entry *next;
340 /* Which cpu are we scheduling for. */
341 enum attr_cpu ix86_schedule;
343 /* Which cpu are we optimizing for. */
344 enum processor_type ix86_tune;
346 /* Which instruction set architecture to use. */
347 enum processor_type ix86_arch;
349 /* True if processor has SSE prefetch instruction. */
350 unsigned char x86_prefetch_sse;
352 /* Preferred alignment for stack boundary in bits. */
353 unsigned int ix86_preferred_stack_boundary;
355 /* Alignment for incoming stack boundary in bits specified at
356 command line. */
357 unsigned int ix86_user_incoming_stack_boundary;
359 /* Default alignment for incoming stack boundary in bits. */
360 unsigned int ix86_default_incoming_stack_boundary;
362 /* Alignment for incoming stack boundary in bits. */
363 unsigned int ix86_incoming_stack_boundary;
365 /* Calling abi specific va_list type nodes. */
366 tree sysv_va_list_type_node;
367 tree ms_va_list_type_node;
369 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
370 char internal_label_prefix[16];
371 int internal_label_prefix_len;
373 /* Fence to use after loop using movnt. */
374 tree x86_mfence;
376 /* Register class used for passing given 64bit part of the argument.
377 These represent classes as documented by the PS ABI, with the exception
378 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
379 use SF or DFmode move instead of DImode to avoid reformatting penalties.
381 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
382 whenever possible (upper half does contain padding). */
383 enum x86_64_reg_class
385 X86_64_NO_CLASS,
386 X86_64_INTEGER_CLASS,
387 X86_64_INTEGERSI_CLASS,
388 X86_64_SSE_CLASS,
389 X86_64_SSESF_CLASS,
390 X86_64_SSEDF_CLASS,
391 X86_64_SSEUP_CLASS,
392 X86_64_X87_CLASS,
393 X86_64_X87UP_CLASS,
394 X86_64_COMPLEX_X87_CLASS,
395 X86_64_MEMORY_CLASS
398 #define MAX_CLASSES 8
400 /* Table of constants used by fldpi, fldln2, etc.... */
401 static REAL_VALUE_TYPE ext_80387_constants_table [5];
402 static bool ext_80387_constants_init;
405 static rtx ix86_function_value (const_tree, const_tree, bool);
406 static bool ix86_function_value_regno_p (const unsigned int);
407 static unsigned int ix86_function_arg_boundary (machine_mode,
408 const_tree);
409 static rtx ix86_static_chain (const_tree, bool);
410 static int ix86_function_regparm (const_tree, const_tree);
411 static void ix86_compute_frame_layout (void);
412 static tree ix86_canonical_va_list_type (tree);
413 static unsigned int split_stack_prologue_scratch_regno (void);
414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
416 static bool ix86_can_inline_p (tree, tree);
417 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
420 /* Whether -mtune= or -march= were specified */
421 int ix86_tune_defaulted;
422 int ix86_arch_specified;
424 /* Return true if a red-zone is in use. We can't use red-zone when
425 there are local indirect jumps, like "indirect_jump" or "tablejump",
426 which jumps to another place in the function, since "call" in the
427 indirect thunk pushes the return address onto stack, destroying
428 red-zone.
430 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
431 for CALL, in red-zone, we can allow local indirect jumps with
432 indirect thunk. */
434 bool
435 ix86_using_red_zone (void)
437 return (TARGET_RED_ZONE
438 && !TARGET_64BIT_MS_ABI
439 && (!cfun->machine->has_local_indirect_jump
440 || cfun->machine->indirect_branch_type == indirect_branch_keep));
443 /* Return true, if profiling code should be emitted before
444 prologue. Otherwise it returns false.
445 Note: For x86 with "hotfix" it is sorried. */
446 static bool
447 ix86_profile_before_prologue (void)
449 return flag_fentry != 0;
452 /* Update register usage after having seen the compiler flags. */
454 static void
455 ix86_conditional_register_usage (void)
457 int i, c_mask;
459 /* If there are no caller-saved registers, preserve all registers.
460 except fixed_regs and registers used for function return value
461 since aggregate_value_p checks call_used_regs[regno] on return
462 value. */
463 if (cfun && cfun->machine->no_caller_saved_registers)
464 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
465 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
466 call_used_regs[i] = 0;
468 /* For 32-bit targets, disable the REX registers. */
469 if (! TARGET_64BIT)
471 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
472 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
473 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
474 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
475 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
476 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
479 /* See the definition of CALL_USED_REGISTERS in i386.h. */
480 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
482 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
484 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
486 /* Set/reset conditionally defined registers from
487 CALL_USED_REGISTERS initializer. */
488 if (call_used_regs[i] > 1)
489 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
491 /* Calculate registers of CLOBBERED_REGS register set
492 as call used registers from GENERAL_REGS register set. */
493 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
494 && call_used_regs[i])
495 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
498 /* If MMX is disabled, disable the registers. */
499 if (! TARGET_MMX)
500 accessible_reg_set &= ~reg_class_contents[MMX_REGS];
502 /* If SSE is disabled, disable the registers. */
503 if (! TARGET_SSE)
504 accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
506 /* If the FPU is disabled, disable the registers. */
507 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
508 accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
510 /* If AVX512F is disabled, disable the registers. */
511 if (! TARGET_AVX512F)
513 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
514 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
516 accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
520 /* Canonicalize a comparison from one we don't have to one we do have. */
522 static void
523 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
524 bool op0_preserve_value)
526 /* The order of operands in x87 ficom compare is forced by combine in
527 simplify_comparison () function. Float operator is treated as RTX_OBJ
528 with a precedence over other operators and is always put in the first
529 place. Swap condition and operands to match ficom instruction. */
530 if (!op0_preserve_value
531 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
533 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
535 /* We are called only for compares that are split to SAHF instruction.
536 Ensure that we have setcc/jcc insn for the swapped condition. */
537 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
539 std::swap (*op0, *op1);
540 *code = (int) scode;
546 /* Hook to determine if one function can safely inline another. */
548 static bool
549 ix86_can_inline_p (tree caller, tree callee)
551 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
552 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
554 /* Changes of those flags can be tolerated for always inlines. Lets hope
555 user knows what he is doing. */
556 const unsigned HOST_WIDE_INT always_inline_safe_mask
557 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
558 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
559 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
560 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
561 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
562 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
563 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
566 if (!callee_tree)
567 callee_tree = target_option_default_node;
568 if (!caller_tree)
569 caller_tree = target_option_default_node;
570 if (callee_tree == caller_tree)
571 return true;
573 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
574 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
575 bool ret = false;
576 bool always_inline
577 = (DECL_DISREGARD_INLINE_LIMITS (callee)
578 && lookup_attribute ("always_inline",
579 DECL_ATTRIBUTES (callee)));
581 cgraph_node *callee_node = cgraph_node::get (callee);
582 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
583 function can inline a SSE2 function but a SSE2 function can't inline
584 a SSE4 function. */
585 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
586 != callee_opts->x_ix86_isa_flags)
587 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
588 != callee_opts->x_ix86_isa_flags2))
589 ret = false;
591 /* See if we have the same non-isa options. */
592 else if ((!always_inline
593 && caller_opts->x_target_flags != callee_opts->x_target_flags)
594 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
595 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
596 ret = false;
598 /* See if arch, tune, etc. are the same. */
599 else if (caller_opts->arch != callee_opts->arch)
600 ret = false;
602 else if (!always_inline && caller_opts->tune != callee_opts->tune)
603 ret = false;
605 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
606 /* If the calle doesn't use FP expressions differences in
607 ix86_fpmath can be ignored. We are called from FEs
608 for multi-versioning call optimization, so beware of
609 ipa_fn_summaries not available. */
610 && (! ipa_fn_summaries
611 || ipa_fn_summaries->get (callee_node) == NULL
612 || ipa_fn_summaries->get (callee_node)->fp_expressions))
613 ret = false;
615 else if (!always_inline
616 && caller_opts->branch_cost != callee_opts->branch_cost)
617 ret = false;
619 else
620 ret = true;
622 return ret;
625 /* Return true if this goes in large data/bss. */
627 static bool
628 ix86_in_large_data_p (tree exp)
630 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
631 return false;
633 if (exp == NULL_TREE)
634 return false;
636 /* Functions are never large data. */
637 if (TREE_CODE (exp) == FUNCTION_DECL)
638 return false;
640 /* Automatic variables are never large data. */
641 if (VAR_P (exp) && !is_global_var (exp))
642 return false;
644 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
646 const char *section = DECL_SECTION_NAME (exp);
647 if (strcmp (section, ".ldata") == 0
648 || strcmp (section, ".lbss") == 0)
649 return true;
650 return false;
652 else
654 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
656 /* If this is an incomplete type with size 0, then we can't put it
657 in data because it might be too big when completed. Also,
658 int_size_in_bytes returns -1 if size can vary or is larger than
659 an integer in which case also it is safer to assume that it goes in
660 large data. */
661 if (size <= 0 || size > ix86_section_threshold)
662 return true;
665 return false;
668 /* i386-specific section flag to mark large sections. */
669 #define SECTION_LARGE SECTION_MACH_DEP
671 /* Switch to the appropriate section for output of DECL.
672 DECL is either a `VAR_DECL' node or a constant of some sort.
673 RELOC indicates whether forming the initial value of DECL requires
674 link-time relocations. */
676 ATTRIBUTE_UNUSED static section *
677 x86_64_elf_select_section (tree decl, int reloc,
678 unsigned HOST_WIDE_INT align)
680 if (ix86_in_large_data_p (decl))
682 const char *sname = NULL;
683 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
684 switch (categorize_decl_for_section (decl, reloc))
686 case SECCAT_DATA:
687 sname = ".ldata";
688 break;
689 case SECCAT_DATA_REL:
690 sname = ".ldata.rel";
691 break;
692 case SECCAT_DATA_REL_LOCAL:
693 sname = ".ldata.rel.local";
694 break;
695 case SECCAT_DATA_REL_RO:
696 sname = ".ldata.rel.ro";
697 break;
698 case SECCAT_DATA_REL_RO_LOCAL:
699 sname = ".ldata.rel.ro.local";
700 break;
701 case SECCAT_BSS:
702 sname = ".lbss";
703 flags |= SECTION_BSS;
704 break;
705 case SECCAT_RODATA:
706 case SECCAT_RODATA_MERGE_STR:
707 case SECCAT_RODATA_MERGE_STR_INIT:
708 case SECCAT_RODATA_MERGE_CONST:
709 sname = ".lrodata";
710 flags &= ~SECTION_WRITE;
711 break;
712 case SECCAT_SRODATA:
713 case SECCAT_SDATA:
714 case SECCAT_SBSS:
715 gcc_unreachable ();
716 case SECCAT_TEXT:
717 case SECCAT_TDATA:
718 case SECCAT_TBSS:
719 /* We don't split these for medium model. Place them into
720 default sections and hope for best. */
721 break;
723 if (sname)
725 /* We might get called with string constants, but get_named_section
726 doesn't like them as they are not DECLs. Also, we need to set
727 flags in that case. */
728 if (!DECL_P (decl))
729 return get_section (sname, flags, NULL);
730 return get_named_section (decl, sname, reloc);
733 return default_elf_select_section (decl, reloc, align);
736 /* Select a set of attributes for section NAME based on the properties
737 of DECL and whether or not RELOC indicates that DECL's initializer
738 might contain runtime relocations. */
740 static unsigned int ATTRIBUTE_UNUSED
741 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
743 unsigned int flags = default_section_type_flags (decl, name, reloc);
745 if (ix86_in_large_data_p (decl))
746 flags |= SECTION_LARGE;
748 if (decl == NULL_TREE
749 && (strcmp (name, ".ldata.rel.ro") == 0
750 || strcmp (name, ".ldata.rel.ro.local") == 0))
751 flags |= SECTION_RELRO;
753 if (strcmp (name, ".lbss") == 0
754 || strncmp (name, ".lbss.", sizeof (".lbss.") - 1) == 0
755 || strncmp (name, ".gnu.linkonce.lb.",
756 sizeof (".gnu.linkonce.lb.") - 1) == 0)
757 flags |= SECTION_BSS;
759 return flags;
762 /* Build up a unique section name, expressed as a
763 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
764 RELOC indicates whether the initial value of EXP requires
765 link-time relocations. */
767 static void ATTRIBUTE_UNUSED
768 x86_64_elf_unique_section (tree decl, int reloc)
770 if (ix86_in_large_data_p (decl))
772 const char *prefix = NULL;
773 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
774 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
776 switch (categorize_decl_for_section (decl, reloc))
778 case SECCAT_DATA:
779 case SECCAT_DATA_REL:
780 case SECCAT_DATA_REL_LOCAL:
781 case SECCAT_DATA_REL_RO:
782 case SECCAT_DATA_REL_RO_LOCAL:
783 prefix = one_only ? ".ld" : ".ldata";
784 break;
785 case SECCAT_BSS:
786 prefix = one_only ? ".lb" : ".lbss";
787 break;
788 case SECCAT_RODATA:
789 case SECCAT_RODATA_MERGE_STR:
790 case SECCAT_RODATA_MERGE_STR_INIT:
791 case SECCAT_RODATA_MERGE_CONST:
792 prefix = one_only ? ".lr" : ".lrodata";
793 break;
794 case SECCAT_SRODATA:
795 case SECCAT_SDATA:
796 case SECCAT_SBSS:
797 gcc_unreachable ();
798 case SECCAT_TEXT:
799 case SECCAT_TDATA:
800 case SECCAT_TBSS:
801 /* We don't split these for medium model. Place them into
802 default sections and hope for best. */
803 break;
805 if (prefix)
807 const char *name, *linkonce;
808 char *string;
810 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
811 name = targetm.strip_name_encoding (name);
813 /* If we're using one_only, then there needs to be a .gnu.linkonce
814 prefix to the section name. */
815 linkonce = one_only ? ".gnu.linkonce" : "";
817 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
819 set_decl_section_name (decl, string);
820 return;
823 default_unique_section (decl, reloc);
826 #ifdef COMMON_ASM_OP
828 #ifndef LARGECOMM_SECTION_ASM_OP
829 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
830 #endif
832 /* This says how to output assembler code to declare an
833 uninitialized external linkage data object.
835 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
836 large objects. */
837 void
838 x86_elf_aligned_decl_common (FILE *file, tree decl,
839 const char *name, unsigned HOST_WIDE_INT size,
840 int align)
842 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
843 && size > (unsigned int)ix86_section_threshold)
845 switch_to_section (get_named_section (decl, ".lbss", 0));
846 fputs (LARGECOMM_SECTION_ASM_OP, file);
848 else
849 fputs (COMMON_ASM_OP, file);
850 assemble_name (file, name);
851 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
852 size, align / BITS_PER_UNIT);
854 #endif
856 /* Utility function for targets to use in implementing
857 ASM_OUTPUT_ALIGNED_BSS. */
859 void
860 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
861 unsigned HOST_WIDE_INT size, int align)
863 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
864 && size > (unsigned int)ix86_section_threshold)
865 switch_to_section (get_named_section (decl, ".lbss", 0));
866 else
867 switch_to_section (bss_section);
868 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
869 #ifdef ASM_DECLARE_OBJECT_NAME
870 last_assemble_variable_decl = decl;
871 ASM_DECLARE_OBJECT_NAME (file, name, decl);
872 #else
873 /* Standard thing is just output label for the object. */
874 ASM_OUTPUT_LABEL (file, name);
875 #endif /* ASM_DECLARE_OBJECT_NAME */
876 ASM_OUTPUT_SKIP (file, size ? size : 1);
879 /* Decide whether we must probe the stack before any space allocation
880 on this target. It's essentially TARGET_STACK_PROBE except when
881 -fstack-check causes the stack to be already probed differently. */
883 bool
884 ix86_target_stack_probe (void)
886 /* Do not probe the stack twice if static stack checking is enabled. */
887 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
888 return false;
890 return TARGET_STACK_PROBE;
893 /* Decide whether we can make a sibling call to a function. DECL is the
894 declaration of the function being targeted by the call and EXP is the
895 CALL_EXPR representing the call. */
897 static bool
898 ix86_function_ok_for_sibcall (tree decl, tree exp)
900 tree type, decl_or_type;
901 rtx a, b;
902 bool bind_global = decl && !targetm.binds_local_p (decl);
904 if (ix86_function_naked (current_function_decl))
905 return false;
907 /* Sibling call isn't OK if there are no caller-saved registers
908 since all registers must be preserved before return. */
909 if (cfun->machine->no_caller_saved_registers)
910 return false;
912 /* If we are generating position-independent code, we cannot sibcall
913 optimize direct calls to global functions, as the PLT requires
914 %ebx be live. (Darwin does not have a PLT.) */
915 if (!TARGET_MACHO
916 && !TARGET_64BIT
917 && flag_pic
918 && flag_plt
919 && bind_global)
920 return false;
922 /* If we need to align the outgoing stack, then sibcalling would
923 unalign the stack, which may break the called function. */
924 if (ix86_minimum_incoming_stack_boundary (true)
925 < PREFERRED_STACK_BOUNDARY)
926 return false;
928 if (decl)
930 decl_or_type = decl;
931 type = TREE_TYPE (decl);
933 else
935 /* We're looking at the CALL_EXPR, we need the type of the function. */
936 type = CALL_EXPR_FN (exp); /* pointer expression */
937 type = TREE_TYPE (type); /* pointer type */
938 type = TREE_TYPE (type); /* function type */
939 decl_or_type = type;
942 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
943 if ((OUTGOING_REG_PARM_STACK_SPACE (type)
944 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
945 || (REG_PARM_STACK_SPACE (decl_or_type)
946 != REG_PARM_STACK_SPACE (current_function_decl)))
948 maybe_complain_about_tail_call (exp,
949 "inconsistent size of stack space"
950 " allocated for arguments which are"
951 " passed in registers");
952 return false;
955 /* Check that the return value locations are the same. Like
956 if we are returning floats on the 80387 register stack, we cannot
957 make a sibcall from a function that doesn't return a float to a
958 function that does or, conversely, from a function that does return
959 a float to a function that doesn't; the necessary stack adjustment
960 would not be executed. This is also the place we notice
961 differences in the return value ABI. Note that it is ok for one
962 of the functions to have void return type as long as the return
963 value of the other is passed in a register. */
964 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
965 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
966 cfun->decl, false);
967 if (STACK_REG_P (a) || STACK_REG_P (b))
969 if (!rtx_equal_p (a, b))
970 return false;
972 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
974 else if (!rtx_equal_p (a, b))
975 return false;
977 if (TARGET_64BIT)
979 /* The SYSV ABI has more call-clobbered registers;
980 disallow sibcalls from MS to SYSV. */
981 if (cfun->machine->call_abi == MS_ABI
982 && ix86_function_type_abi (type) == SYSV_ABI)
983 return false;
985 else
987 /* If this call is indirect, we'll need to be able to use a
988 call-clobbered register for the address of the target function.
989 Make sure that all such registers are not used for passing
990 parameters. Note that DLLIMPORT functions and call to global
991 function via GOT slot are indirect. */
992 if (!decl
993 || (bind_global && flag_pic && !flag_plt)
994 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
995 || flag_force_indirect_call)
997 /* Check if regparm >= 3 since arg_reg_available is set to
998 false if regparm == 0. If regparm is 1 or 2, there is
999 always a call-clobbered register available.
1001 ??? The symbol indirect call doesn't need a call-clobbered
1002 register. But we don't know if this is a symbol indirect
1003 call or not here. */
1004 if (ix86_function_regparm (type, decl) >= 3
1005 && !cfun->machine->arg_reg_available)
1006 return false;
1010 /* Otherwise okay. That also includes certain types of indirect calls. */
1011 return true;
1014 /* This function determines from TYPE the calling-convention. */
1016 unsigned int
1017 ix86_get_callcvt (const_tree type)
1019 unsigned int ret = 0;
1020 bool is_stdarg;
1021 tree attrs;
1023 if (TARGET_64BIT)
1024 return IX86_CALLCVT_CDECL;
1026 attrs = TYPE_ATTRIBUTES (type);
1027 if (attrs != NULL_TREE)
1029 if (lookup_attribute ("cdecl", attrs))
1030 ret |= IX86_CALLCVT_CDECL;
1031 else if (lookup_attribute ("stdcall", attrs))
1032 ret |= IX86_CALLCVT_STDCALL;
1033 else if (lookup_attribute ("fastcall", attrs))
1034 ret |= IX86_CALLCVT_FASTCALL;
1035 else if (lookup_attribute ("thiscall", attrs))
1036 ret |= IX86_CALLCVT_THISCALL;
1038 /* Regparam isn't allowed for thiscall and fastcall. */
1039 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1041 if (lookup_attribute ("regparm", attrs))
1042 ret |= IX86_CALLCVT_REGPARM;
1043 if (lookup_attribute ("sseregparm", attrs))
1044 ret |= IX86_CALLCVT_SSEREGPARM;
1047 if (IX86_BASE_CALLCVT(ret) != 0)
1048 return ret;
1051 is_stdarg = stdarg_p (type);
1052 if (TARGET_RTD && !is_stdarg)
1053 return IX86_CALLCVT_STDCALL | ret;
1055 if (ret != 0
1056 || is_stdarg
1057 || TREE_CODE (type) != METHOD_TYPE
1058 || ix86_function_type_abi (type) != MS_ABI)
1059 return IX86_CALLCVT_CDECL | ret;
1061 return IX86_CALLCVT_THISCALL;
1064 /* Return 0 if the attributes for two types are incompatible, 1 if they
1065 are compatible, and 2 if they are nearly compatible (which causes a
1066 warning to be generated). */
1068 static int
1069 ix86_comp_type_attributes (const_tree type1, const_tree type2)
1071 unsigned int ccvt1, ccvt2;
1073 if (TREE_CODE (type1) != FUNCTION_TYPE
1074 && TREE_CODE (type1) != METHOD_TYPE)
1075 return 1;
1077 ccvt1 = ix86_get_callcvt (type1);
1078 ccvt2 = ix86_get_callcvt (type2);
1079 if (ccvt1 != ccvt2)
1080 return 0;
1081 if (ix86_function_regparm (type1, NULL)
1082 != ix86_function_regparm (type2, NULL))
1083 return 0;
1085 return 1;
1088 /* Return the regparm value for a function with the indicated TYPE and DECL.
1089 DECL may be NULL when calling function indirectly
1090 or considering a libcall. */
1092 static int
1093 ix86_function_regparm (const_tree type, const_tree decl)
1095 tree attr;
1096 int regparm;
1097 unsigned int ccvt;
1099 if (TARGET_64BIT)
1100 return (ix86_function_type_abi (type) == SYSV_ABI
1101 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1102 ccvt = ix86_get_callcvt (type);
1103 regparm = ix86_regparm;
1105 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1107 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1108 if (attr)
1110 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1111 return regparm;
1114 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1115 return 2;
1116 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1117 return 1;
1119 /* Use register calling convention for local functions when possible. */
1120 if (decl
1121 && TREE_CODE (decl) == FUNCTION_DECL)
1123 cgraph_node *target = cgraph_node::get (decl);
1124 if (target)
1125 target = target->function_symbol ();
1127 /* Caller and callee must agree on the calling convention, so
1128 checking here just optimize means that with
1129 __attribute__((optimize (...))) caller could use regparm convention
1130 and callee not, or vice versa. Instead look at whether the callee
1131 is optimized or not. */
1132 if (target && opt_for_fn (target->decl, optimize)
1133 && !(profile_flag && !flag_fentry))
1135 if (target->local && target->can_change_signature)
1137 int local_regparm, globals = 0, regno;
1139 /* Make sure no regparm register is taken by a
1140 fixed register variable. */
1141 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1142 local_regparm++)
1143 if (fixed_regs[local_regparm])
1144 break;
1146 /* We don't want to use regparm(3) for nested functions as
1147 these use a static chain pointer in the third argument. */
1148 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1149 local_regparm = 2;
1151 /* Save a register for the split stack. */
1152 if (flag_split_stack)
1154 if (local_regparm == 3)
1155 local_regparm = 2;
1156 else if (local_regparm == 2
1157 && DECL_STATIC_CHAIN (target->decl))
1158 local_regparm = 1;
1161 /* Each fixed register usage increases register pressure,
1162 so less registers should be used for argument passing.
1163 This functionality can be overriden by an explicit
1164 regparm value. */
1165 for (regno = AX_REG; regno <= DI_REG; regno++)
1166 if (fixed_regs[regno])
1167 globals++;
1169 local_regparm
1170 = globals < local_regparm ? local_regparm - globals : 0;
1172 if (local_regparm > regparm)
1173 regparm = local_regparm;
1178 return regparm;
1181 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1182 DFmode (2) arguments in SSE registers for a function with the
1183 indicated TYPE and DECL. DECL may be NULL when calling function
1184 indirectly or considering a libcall. Return -1 if any FP parameter
1185 should be rejected by error. This is used in siutation we imply SSE
1186 calling convetion but the function is called from another function with
1187 SSE disabled. Otherwise return 0. */
1189 static int
1190 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1192 gcc_assert (!TARGET_64BIT);
1194 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1195 by the sseregparm attribute. */
1196 if (TARGET_SSEREGPARM
1197 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1199 if (!TARGET_SSE)
1201 if (warn)
1203 if (decl)
1204 error ("calling %qD with attribute sseregparm without "
1205 "SSE/SSE2 enabled", decl);
1206 else
1207 error ("calling %qT with attribute sseregparm without "
1208 "SSE/SSE2 enabled", type);
1210 return 0;
1213 return 2;
1216 if (!decl)
1217 return 0;
1219 cgraph_node *target = cgraph_node::get (decl);
1220 if (target)
1221 target = target->function_symbol ();
1223 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1224 (and DFmode for SSE2) arguments in SSE registers. */
1225 if (target
1226 /* TARGET_SSE_MATH */
1227 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1228 && opt_for_fn (target->decl, optimize)
1229 && !(profile_flag && !flag_fentry))
1231 if (target->local && target->can_change_signature)
1233 /* Refuse to produce wrong code when local function with SSE enabled
1234 is called from SSE disabled function.
1235 FIXME: We need a way to detect these cases cross-ltrans partition
1236 and avoid using SSE calling conventions on local functions called
1237 from function with SSE disabled. For now at least delay the
1238 warning until we know we are going to produce wrong code.
1239 See PR66047 */
1240 if (!TARGET_SSE && warn)
1241 return -1;
1242 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1243 ->x_ix86_isa_flags) ? 2 : 1;
1247 return 0;
1250 /* Return true if EAX is live at the start of the function. Used by
1251 ix86_expand_prologue to determine if we need special help before
1252 calling allocate_stack_worker. */
1254 static bool
1255 ix86_eax_live_at_start_p (void)
1257 /* Cheat. Don't bother working forward from ix86_function_regparm
1258 to the function type to whether an actual argument is located in
1259 eax. Instead just look at cfg info, which is still close enough
1260 to correct at this point. This gives false positives for broken
1261 functions that might use uninitialized data that happens to be
1262 allocated in eax, but who cares? */
1263 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1266 static bool
1267 ix86_keep_aggregate_return_pointer (tree fntype)
1269 tree attr;
1271 if (!TARGET_64BIT)
1273 attr = lookup_attribute ("callee_pop_aggregate_return",
1274 TYPE_ATTRIBUTES (fntype));
1275 if (attr)
1276 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1278 /* For 32-bit MS-ABI the default is to keep aggregate
1279 return pointer. */
1280 if (ix86_function_type_abi (fntype) == MS_ABI)
1281 return true;
1283 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1286 /* Value is the number of bytes of arguments automatically
1287 popped when returning from a subroutine call.
1288 FUNDECL is the declaration node of the function (as a tree),
1289 FUNTYPE is the data type of the function (as a tree),
1290 or for a library call it is an identifier node for the subroutine name.
1291 SIZE is the number of bytes of arguments passed on the stack.
1293 On the 80386, the RTD insn may be used to pop them if the number
1294 of args is fixed, but if the number is variable then the caller
1295 must pop them all. RTD can't be used for library calls now
1296 because the library is compiled with the Unix compiler.
1297 Use of RTD is a selectable option, since it is incompatible with
1298 standard Unix calling sequences. If the option is not selected,
1299 the caller must always pop the args.
1301 The attribute stdcall is equivalent to RTD on a per module basis. */
1303 static poly_int64
1304 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1306 unsigned int ccvt;
1308 /* None of the 64-bit ABIs pop arguments. */
1309 if (TARGET_64BIT)
1310 return 0;
1312 ccvt = ix86_get_callcvt (funtype);
1314 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1315 | IX86_CALLCVT_THISCALL)) != 0
1316 && ! stdarg_p (funtype))
1317 return size;
1319 /* Lose any fake structure return argument if it is passed on the stack. */
1320 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1321 && !ix86_keep_aggregate_return_pointer (funtype))
1323 int nregs = ix86_function_regparm (funtype, fundecl);
1324 if (nregs == 0)
1325 return GET_MODE_SIZE (Pmode);
1328 return 0;
1331 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1333 static bool
1334 ix86_legitimate_combined_insn (rtx_insn *insn)
1336 int i;
1338 /* Check operand constraints in case hard registers were propagated
1339 into insn pattern. This check prevents combine pass from
1340 generating insn patterns with invalid hard register operands.
1341 These invalid insns can eventually confuse reload to error out
1342 with a spill failure. See also PRs 46829 and 46843. */
1344 gcc_assert (INSN_CODE (insn) >= 0);
1346 extract_insn (insn);
1347 preprocess_constraints (insn);
1349 int n_operands = recog_data.n_operands;
1350 int n_alternatives = recog_data.n_alternatives;
1351 for (i = 0; i < n_operands; i++)
1353 rtx op = recog_data.operand[i];
1354 machine_mode mode = GET_MODE (op);
1355 const operand_alternative *op_alt;
1356 int offset = 0;
1357 bool win;
1358 int j;
1360 /* A unary operator may be accepted by the predicate, but it
1361 is irrelevant for matching constraints. */
1362 if (UNARY_P (op))
1363 op = XEXP (op, 0);
1365 if (SUBREG_P (op))
1367 if (REG_P (SUBREG_REG (op))
1368 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1369 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1370 GET_MODE (SUBREG_REG (op)),
1371 SUBREG_BYTE (op),
1372 GET_MODE (op));
1373 op = SUBREG_REG (op);
1376 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1377 continue;
1379 op_alt = recog_op_alt;
1381 /* Operand has no constraints, anything is OK. */
1382 win = !n_alternatives;
1384 alternative_mask preferred = get_preferred_alternatives (insn);
1385 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1387 if (!TEST_BIT (preferred, j))
1388 continue;
1389 if (op_alt[i].anything_ok
1390 || (op_alt[i].matches != -1
1391 && operands_match_p
1392 (recog_data.operand[i],
1393 recog_data.operand[op_alt[i].matches]))
1394 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1396 win = true;
1397 break;
1401 if (!win)
1402 return false;
1405 return true;
1408 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1410 static unsigned HOST_WIDE_INT
1411 ix86_asan_shadow_offset (void)
1413 return SUBTARGET_SHADOW_OFFSET;
1416 /* Argument support functions. */
1418 /* Return true when register may be used to pass function parameters. */
1419 bool
1420 ix86_function_arg_regno_p (int regno)
1422 int i;
1423 enum calling_abi call_abi;
1424 const int *parm_regs;
1426 if (!TARGET_64BIT)
1428 if (TARGET_MACHO)
1429 return (regno < REGPARM_MAX
1430 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1431 else
1432 return (regno < REGPARM_MAX
1433 || (TARGET_MMX && MMX_REGNO_P (regno)
1434 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
1435 || (TARGET_SSE && SSE_REGNO_P (regno)
1436 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
1439 if (TARGET_SSE && SSE_REGNO_P (regno)
1440 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
1441 return true;
1443 /* TODO: The function should depend on current function ABI but
1444 builtins.c would need updating then. Therefore we use the
1445 default ABI. */
1446 call_abi = ix86_cfun_abi ();
1448 /* RAX is used as hidden argument to va_arg functions. */
1449 if (call_abi == SYSV_ABI && regno == AX_REG)
1450 return true;
1452 if (call_abi == MS_ABI)
1453 parm_regs = x86_64_ms_abi_int_parameter_registers;
1454 else
1455 parm_regs = x86_64_int_parameter_registers;
1457 for (i = 0; i < (call_abi == MS_ABI
1458 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1459 if (regno == parm_regs[i])
1460 return true;
1461 return false;
1464 /* Return if we do not know how to pass ARG solely in registers. */
1466 static bool
1467 ix86_must_pass_in_stack (const function_arg_info &arg)
1469 if (must_pass_in_stack_var_size_or_pad (arg))
1470 return true;
1472 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1473 The layout_type routine is crafty and tries to trick us into passing
1474 currently unsupported vector types on the stack by using TImode. */
1475 return (!TARGET_64BIT && arg.mode == TImode
1476 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1479 /* It returns the size, in bytes, of the area reserved for arguments passed
1480 in registers for the function represented by fndecl dependent to the used
1481 abi format. */
1483 ix86_reg_parm_stack_space (const_tree fndecl)
1485 enum calling_abi call_abi = SYSV_ABI;
1486 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1487 call_abi = ix86_function_abi (fndecl);
1488 else
1489 call_abi = ix86_function_type_abi (fndecl);
1490 if (TARGET_64BIT && call_abi == MS_ABI)
1491 return 32;
1492 return 0;
1495 /* We add this as a workaround in order to use libc_has_function
1496 hook in i386.md. */
1497 bool
1498 ix86_libc_has_function (enum function_class fn_class)
1500 return targetm.libc_has_function (fn_class, NULL_TREE);
1503 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1504 specifying the call abi used. */
1505 enum calling_abi
1506 ix86_function_type_abi (const_tree fntype)
1508 enum calling_abi abi = ix86_abi;
1510 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1511 return abi;
1513 if (abi == SYSV_ABI
1514 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1516 static int warned;
1517 if (TARGET_X32 && !warned)
1519 error ("X32 does not support %<ms_abi%> attribute");
1520 warned = 1;
1523 abi = MS_ABI;
1525 else if (abi == MS_ABI
1526 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1527 abi = SYSV_ABI;
1529 return abi;
1532 enum calling_abi
1533 ix86_function_abi (const_tree fndecl)
1535 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1538 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1539 specifying the call abi used. */
1540 enum calling_abi
1541 ix86_cfun_abi (void)
1543 return cfun ? cfun->machine->call_abi : ix86_abi;
1546 bool
1547 ix86_function_ms_hook_prologue (const_tree fn)
1549 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1551 if (decl_function_context (fn) != NULL_TREE)
1552 error_at (DECL_SOURCE_LOCATION (fn),
1553 "%<ms_hook_prologue%> attribute is not compatible "
1554 "with nested function");
1555 else
1556 return true;
1558 return false;
1561 bool
1562 ix86_function_naked (const_tree fn)
1564 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1565 return true;
1567 return false;
1570 /* Write the extra assembler code needed to declare a function properly. */
1572 void
1573 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
1574 tree decl)
1576 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1578 if (cfun)
1579 cfun->machine->function_label_emitted = true;
1581 if (is_ms_hook)
1583 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1584 unsigned int filler_cc = 0xcccccccc;
1586 for (i = 0; i < filler_count; i += 4)
1587 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
1590 #ifdef SUBTARGET_ASM_UNWIND_INIT
1591 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
1592 #endif
1594 ASM_OUTPUT_LABEL (asm_out_file, fname);
1596 /* Output magic byte marker, if hot-patch attribute is set. */
1597 if (is_ms_hook)
1599 if (TARGET_64BIT)
1601 /* leaq [%rsp + 0], %rsp */
1602 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1603 asm_out_file);
1605 else
1607 /* movl.s %edi, %edi
1608 push %ebp
1609 movl.s %esp, %ebp */
1610 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
1615 /* Implementation of call abi switching target hook. Specific to FNDECL
1616 the specific call register sets are set. See also
1617 ix86_conditional_register_usage for more details. */
1618 void
1619 ix86_call_abi_override (const_tree fndecl)
1621 cfun->machine->call_abi = ix86_function_abi (fndecl);
1624 /* Return 1 if pseudo register should be created and used to hold
1625 GOT address for PIC code. */
1626 bool
1627 ix86_use_pseudo_pic_reg (void)
1629 if ((TARGET_64BIT
1630 && (ix86_cmodel == CM_SMALL_PIC
1631 || TARGET_PECOFF))
1632 || !flag_pic)
1633 return false;
1634 return true;
1637 /* Initialize large model PIC register. */
1639 static void
1640 ix86_init_large_pic_reg (unsigned int tmp_regno)
1642 rtx_code_label *label;
1643 rtx tmp_reg;
1645 gcc_assert (Pmode == DImode);
1646 label = gen_label_rtx ();
1647 emit_label (label);
1648 LABEL_PRESERVE_P (label) = 1;
1649 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1650 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1651 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1652 label));
1653 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1654 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1655 const char *name = LABEL_NAME (label);
1656 PUT_CODE (label, NOTE);
1657 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1658 NOTE_DELETED_LABEL_NAME (label) = name;
1661 /* Create and initialize PIC register if required. */
1662 static void
1663 ix86_init_pic_reg (void)
1665 edge entry_edge;
1666 rtx_insn *seq;
1668 if (!ix86_use_pseudo_pic_reg ())
1669 return;
1671 start_sequence ();
1673 if (TARGET_64BIT)
1675 if (ix86_cmodel == CM_LARGE_PIC)
1676 ix86_init_large_pic_reg (R11_REG);
1677 else
1678 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1680 else
1682 /* If there is future mcount call in the function it is more profitable
1683 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1684 rtx reg = crtl->profile
1685 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1686 : pic_offset_table_rtx;
1687 rtx_insn *insn = emit_insn (gen_set_got (reg));
1688 RTX_FRAME_RELATED_P (insn) = 1;
1689 if (crtl->profile)
1690 emit_move_insn (pic_offset_table_rtx, reg);
1691 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1694 seq = get_insns ();
1695 end_sequence ();
1697 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1698 insert_insn_on_edge (seq, entry_edge);
1699 commit_one_edge_insertion (entry_edge);
1702 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1703 for a call to a function whose data type is FNTYPE.
1704 For a library call, FNTYPE is 0. */
1706 void
1707 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1708 tree fntype, /* tree ptr for function decl */
1709 rtx libname, /* SYMBOL_REF of library name or 0 */
1710 tree fndecl,
1711 int caller)
1713 struct cgraph_node *local_info_node = NULL;
1714 struct cgraph_node *target = NULL;
1716 memset (cum, 0, sizeof (*cum));
1718 if (fndecl)
1720 target = cgraph_node::get (fndecl);
1721 if (target)
1723 target = target->function_symbol ();
1724 local_info_node = cgraph_node::local_info_node (target->decl);
1725 cum->call_abi = ix86_function_abi (target->decl);
1727 else
1728 cum->call_abi = ix86_function_abi (fndecl);
1730 else
1731 cum->call_abi = ix86_function_type_abi (fntype);
1733 cum->caller = caller;
1735 /* Set up the number of registers to use for passing arguments. */
1736 cum->nregs = ix86_regparm;
1737 if (TARGET_64BIT)
1739 cum->nregs = (cum->call_abi == SYSV_ABI
1740 ? X86_64_REGPARM_MAX
1741 : X86_64_MS_REGPARM_MAX);
1743 if (TARGET_SSE)
1745 cum->sse_nregs = SSE_REGPARM_MAX;
1746 if (TARGET_64BIT)
1748 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1749 ? X86_64_SSE_REGPARM_MAX
1750 : X86_64_MS_SSE_REGPARM_MAX);
1753 if (TARGET_MMX)
1754 cum->mmx_nregs = MMX_REGPARM_MAX;
1755 cum->warn_avx512f = true;
1756 cum->warn_avx = true;
1757 cum->warn_sse = true;
1758 cum->warn_mmx = true;
1760 /* Because type might mismatch in between caller and callee, we need to
1761 use actual type of function for local calls.
1762 FIXME: cgraph_analyze can be told to actually record if function uses
1763 va_start so for local functions maybe_vaarg can be made aggressive
1764 helping K&R code.
1765 FIXME: once typesytem is fixed, we won't need this code anymore. */
1766 if (local_info_node && local_info_node->local
1767 && local_info_node->can_change_signature)
1768 fntype = TREE_TYPE (target->decl);
1769 cum->stdarg = stdarg_p (fntype);
1770 cum->maybe_vaarg = (fntype
1771 ? (!prototype_p (fntype) || stdarg_p (fntype))
1772 : !libname);
1774 cum->decl = fndecl;
1776 cum->warn_empty = !warn_abi || cum->stdarg;
1777 if (!cum->warn_empty && fntype)
1779 function_args_iterator iter;
1780 tree argtype;
1781 bool seen_empty_type = false;
1782 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1784 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1785 break;
1786 if (TYPE_EMPTY_P (argtype))
1787 seen_empty_type = true;
1788 else if (seen_empty_type)
1790 cum->warn_empty = true;
1791 break;
1796 if (!TARGET_64BIT)
1798 /* If there are variable arguments, then we won't pass anything
1799 in registers in 32-bit mode. */
1800 if (stdarg_p (fntype))
1802 cum->nregs = 0;
1803 /* Since in 32-bit, variable arguments are always passed on
1804 stack, there is scratch register available for indirect
1805 sibcall. */
1806 cfun->machine->arg_reg_available = true;
1807 cum->sse_nregs = 0;
1808 cum->mmx_nregs = 0;
1809 cum->warn_avx512f = false;
1810 cum->warn_avx = false;
1811 cum->warn_sse = false;
1812 cum->warn_mmx = false;
1813 return;
1816 /* Use ecx and edx registers if function has fastcall attribute,
1817 else look for regparm information. */
1818 if (fntype)
1820 unsigned int ccvt = ix86_get_callcvt (fntype);
1821 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1823 cum->nregs = 1;
1824 cum->fastcall = 1; /* Same first register as in fastcall. */
1826 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1828 cum->nregs = 2;
1829 cum->fastcall = 1;
1831 else
1832 cum->nregs = ix86_function_regparm (fntype, fndecl);
1835 /* Set up the number of SSE registers used for passing SFmode
1836 and DFmode arguments. Warn for mismatching ABI. */
1837 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
1840 cfun->machine->arg_reg_available = (cum->nregs > 0);
1843 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1844 But in the case of vector types, it is some vector mode.
1846 When we have only some of our vector isa extensions enabled, then there
1847 are some modes for which vector_mode_supported_p is false. For these
1848 modes, the generic vector support in gcc will choose some non-vector mode
1849 in order to implement the type. By computing the natural mode, we'll
1850 select the proper ABI location for the operand and not depend on whatever
1851 the middle-end decides to do with these vector types.
1853 The midde-end can't deal with the vector types > 16 bytes. In this
1854 case, we return the original mode and warn ABI change if CUM isn't
1855 NULL.
1857 If INT_RETURN is true, warn ABI change if the vector mode isn't
1858 available for function return value. */
1860 static machine_mode
1861 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1862 bool in_return)
1864 machine_mode mode = TYPE_MODE (type);
1866 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
1868 HOST_WIDE_INT size = int_size_in_bytes (type);
1869 if ((size == 8 || size == 16 || size == 32 || size == 64)
1870 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1871 && TYPE_VECTOR_SUBPARTS (type) > 1)
1873 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1875 /* There are no XFmode vector modes. */
1876 if (innermode == XFmode)
1877 return mode;
1879 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
1880 mode = MIN_MODE_VECTOR_FLOAT;
1881 else
1882 mode = MIN_MODE_VECTOR_INT;
1884 /* Get the mode which has this inner mode and number of units. */
1885 FOR_EACH_MODE_FROM (mode, mode)
1886 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
1887 && GET_MODE_INNER (mode) == innermode)
1889 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
1891 static bool warnedavx512f;
1892 static bool warnedavx512f_ret;
1894 if (cum && cum->warn_avx512f && !warnedavx512f)
1896 if (warning (OPT_Wpsabi, "AVX512F vector argument "
1897 "without AVX512F enabled changes the ABI"))
1898 warnedavx512f = true;
1900 else if (in_return && !warnedavx512f_ret)
1902 if (warning (OPT_Wpsabi, "AVX512F vector return "
1903 "without AVX512F enabled changes the ABI"))
1904 warnedavx512f_ret = true;
1907 return TYPE_MODE (type);
1909 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
1911 static bool warnedavx;
1912 static bool warnedavx_ret;
1914 if (cum && cum->warn_avx && !warnedavx)
1916 if (warning (OPT_Wpsabi, "AVX vector argument "
1917 "without AVX enabled changes the ABI"))
1918 warnedavx = true;
1920 else if (in_return && !warnedavx_ret)
1922 if (warning (OPT_Wpsabi, "AVX vector return "
1923 "without AVX enabled changes the ABI"))
1924 warnedavx_ret = true;
1927 return TYPE_MODE (type);
1929 else if (((size == 8 && TARGET_64BIT) || size == 16)
1930 && !TARGET_SSE
1931 && !TARGET_IAMCU)
1933 static bool warnedsse;
1934 static bool warnedsse_ret;
1936 if (cum && cum->warn_sse && !warnedsse)
1938 if (warning (OPT_Wpsabi, "SSE vector argument "
1939 "without SSE enabled changes the ABI"))
1940 warnedsse = true;
1942 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
1944 if (warning (OPT_Wpsabi, "SSE vector return "
1945 "without SSE enabled changes the ABI"))
1946 warnedsse_ret = true;
1949 else if ((size == 8 && !TARGET_64BIT)
1950 && (!cfun
1951 || cfun->machine->func_type == TYPE_NORMAL)
1952 && !TARGET_MMX
1953 && !TARGET_IAMCU)
1955 static bool warnedmmx;
1956 static bool warnedmmx_ret;
1958 if (cum && cum->warn_mmx && !warnedmmx)
1960 if (warning (OPT_Wpsabi, "MMX vector argument "
1961 "without MMX enabled changes the ABI"))
1962 warnedmmx = true;
1964 else if (in_return && !warnedmmx_ret)
1966 if (warning (OPT_Wpsabi, "MMX vector return "
1967 "without MMX enabled changes the ABI"))
1968 warnedmmx_ret = true;
1971 return mode;
1974 gcc_unreachable ();
1978 return mode;
1981 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
1982 this may not agree with the mode that the type system has chosen for the
1983 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
1984 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
1986 static rtx
1987 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
1988 unsigned int regno)
1990 rtx tmp;
1992 if (orig_mode != BLKmode)
1993 tmp = gen_rtx_REG (orig_mode, regno);
1994 else
1996 tmp = gen_rtx_REG (mode, regno);
1997 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
1998 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2001 return tmp;
2004 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2005 of this code is to classify each 8bytes of incoming argument by the register
2006 class and assign registers accordingly. */
2008 /* Return the union class of CLASS1 and CLASS2.
2009 See the x86-64 PS ABI for details. */
2011 static enum x86_64_reg_class
2012 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2014 /* Rule #1: If both classes are equal, this is the resulting class. */
2015 if (class1 == class2)
2016 return class1;
2018 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2019 the other class. */
2020 if (class1 == X86_64_NO_CLASS)
2021 return class2;
2022 if (class2 == X86_64_NO_CLASS)
2023 return class1;
2025 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2026 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2027 return X86_64_MEMORY_CLASS;
2029 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2030 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2031 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2032 return X86_64_INTEGERSI_CLASS;
2033 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2034 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2035 return X86_64_INTEGER_CLASS;
2037 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2038 MEMORY is used. */
2039 if (class1 == X86_64_X87_CLASS
2040 || class1 == X86_64_X87UP_CLASS
2041 || class1 == X86_64_COMPLEX_X87_CLASS
2042 || class2 == X86_64_X87_CLASS
2043 || class2 == X86_64_X87UP_CLASS
2044 || class2 == X86_64_COMPLEX_X87_CLASS)
2045 return X86_64_MEMORY_CLASS;
2047 /* Rule #6: Otherwise class SSE is used. */
2048 return X86_64_SSE_CLASS;
2051 /* Classify the argument of type TYPE and mode MODE.
2052 CLASSES will be filled by the register class used to pass each word
2053 of the operand. The number of words is returned. In case the parameter
2054 should be passed in memory, 0 is returned. As a special case for zero
2055 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2057 BIT_OFFSET is used internally for handling records and specifies offset
2058 of the offset in bits modulo 512 to avoid overflow cases.
2060 See the x86-64 PS ABI for details.
2063 static int
2064 classify_argument (machine_mode mode, const_tree type,
2065 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2067 HOST_WIDE_INT bytes
2068 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2069 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2071 /* Variable sized entities are always passed/returned in memory. */
2072 if (bytes < 0)
2073 return 0;
2075 if (mode != VOIDmode)
2077 /* The value of "named" doesn't matter. */
2078 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2079 if (targetm.calls.must_pass_in_stack (arg))
2080 return 0;
2083 if (type && AGGREGATE_TYPE_P (type))
2085 int i;
2086 tree field;
2087 enum x86_64_reg_class subclasses[MAX_CLASSES];
2089 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2090 if (bytes > 64)
2091 return 0;
2093 for (i = 0; i < words; i++)
2094 classes[i] = X86_64_NO_CLASS;
2096 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2097 signalize memory class, so handle it as special case. */
2098 if (!words)
2100 classes[0] = X86_64_NO_CLASS;
2101 return 1;
2104 /* Classify each field of record and merge classes. */
2105 switch (TREE_CODE (type))
2107 case RECORD_TYPE:
2108 /* And now merge the fields of structure. */
2109 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2111 if (TREE_CODE (field) == FIELD_DECL)
2113 int num;
2115 if (TREE_TYPE (field) == error_mark_node)
2116 continue;
2118 /* Bitfields are always classified as integer. Handle them
2119 early, since later code would consider them to be
2120 misaligned integers. */
2121 if (DECL_BIT_FIELD (field))
2123 for (i = (int_bit_position (field)
2124 + (bit_offset % 64)) / 8 / 8;
2125 i < ((int_bit_position (field) + (bit_offset % 64))
2126 + tree_to_shwi (DECL_SIZE (field))
2127 + 63) / 8 / 8; i++)
2128 classes[i]
2129 = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2131 else
2133 int pos;
2135 type = TREE_TYPE (field);
2137 /* Flexible array member is ignored. */
2138 if (TYPE_MODE (type) == BLKmode
2139 && TREE_CODE (type) == ARRAY_TYPE
2140 && TYPE_SIZE (type) == NULL_TREE
2141 && TYPE_DOMAIN (type) != NULL_TREE
2142 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2143 == NULL_TREE))
2145 static bool warned;
2147 if (!warned && warn_psabi)
2149 warned = true;
2150 inform (input_location,
2151 "the ABI of passing struct with"
2152 " a flexible array member has"
2153 " changed in GCC 4.4");
2155 continue;
2157 num = classify_argument (TYPE_MODE (type), type,
2158 subclasses,
2159 (int_bit_position (field)
2160 + bit_offset) % 512);
2161 if (!num)
2162 return 0;
2163 pos = (int_bit_position (field)
2164 + (bit_offset % 64)) / 8 / 8;
2165 for (i = 0; i < num && (i + pos) < words; i++)
2166 classes[i + pos]
2167 = merge_classes (subclasses[i], classes[i + pos]);
2171 break;
2173 case ARRAY_TYPE:
2174 /* Arrays are handled as small records. */
2176 int num;
2177 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2178 TREE_TYPE (type), subclasses, bit_offset);
2179 if (!num)
2180 return 0;
2182 /* The partial classes are now full classes. */
2183 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2184 subclasses[0] = X86_64_SSE_CLASS;
2185 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2186 && !((bit_offset % 64) == 0 && bytes == 4))
2187 subclasses[0] = X86_64_INTEGER_CLASS;
2189 for (i = 0; i < words; i++)
2190 classes[i] = subclasses[i % num];
2192 break;
2194 case UNION_TYPE:
2195 case QUAL_UNION_TYPE:
2196 /* Unions are similar to RECORD_TYPE but offset is always 0.
2198 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2200 if (TREE_CODE (field) == FIELD_DECL)
2202 int num;
2204 if (TREE_TYPE (field) == error_mark_node)
2205 continue;
2207 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2208 TREE_TYPE (field), subclasses,
2209 bit_offset);
2210 if (!num)
2211 return 0;
2212 for (i = 0; i < num && i < words; i++)
2213 classes[i] = merge_classes (subclasses[i], classes[i]);
2216 break;
2218 default:
2219 gcc_unreachable ();
2222 if (words > 2)
2224 /* When size > 16 bytes, if the first one isn't
2225 X86_64_SSE_CLASS or any other ones aren't
2226 X86_64_SSEUP_CLASS, everything should be passed in
2227 memory. */
2228 if (classes[0] != X86_64_SSE_CLASS)
2229 return 0;
2231 for (i = 1; i < words; i++)
2232 if (classes[i] != X86_64_SSEUP_CLASS)
2233 return 0;
2236 /* Final merger cleanup. */
2237 for (i = 0; i < words; i++)
2239 /* If one class is MEMORY, everything should be passed in
2240 memory. */
2241 if (classes[i] == X86_64_MEMORY_CLASS)
2242 return 0;
2244 /* The X86_64_SSEUP_CLASS should be always preceded by
2245 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2246 if (classes[i] == X86_64_SSEUP_CLASS
2247 && classes[i - 1] != X86_64_SSE_CLASS
2248 && classes[i - 1] != X86_64_SSEUP_CLASS)
2250 /* The first one should never be X86_64_SSEUP_CLASS. */
2251 gcc_assert (i != 0);
2252 classes[i] = X86_64_SSE_CLASS;
2255 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2256 everything should be passed in memory. */
2257 if (classes[i] == X86_64_X87UP_CLASS
2258 && (classes[i - 1] != X86_64_X87_CLASS))
2260 static bool warned;
2262 /* The first one should never be X86_64_X87UP_CLASS. */
2263 gcc_assert (i != 0);
2264 if (!warned && warn_psabi)
2266 warned = true;
2267 inform (input_location,
2268 "the ABI of passing union with %<long double%>"
2269 " has changed in GCC 4.4");
2271 return 0;
2274 return words;
2277 /* Compute alignment needed. We align all types to natural boundaries with
2278 exception of XFmode that is aligned to 64bits. */
2279 if (mode != VOIDmode && mode != BLKmode)
2281 int mode_alignment = GET_MODE_BITSIZE (mode);
2283 if (mode == XFmode)
2284 mode_alignment = 128;
2285 else if (mode == XCmode)
2286 mode_alignment = 256;
2287 if (COMPLEX_MODE_P (mode))
2288 mode_alignment /= 2;
2289 /* Misaligned fields are always returned in memory. */
2290 if (bit_offset % mode_alignment)
2291 return 0;
2294 /* for V1xx modes, just use the base mode */
2295 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2296 && GET_MODE_UNIT_SIZE (mode) == bytes)
2297 mode = GET_MODE_INNER (mode);
2299 /* Classification of atomic types. */
2300 switch (mode)
2302 case E_SDmode:
2303 case E_DDmode:
2304 classes[0] = X86_64_SSE_CLASS;
2305 return 1;
2306 case E_TDmode:
2307 classes[0] = X86_64_SSE_CLASS;
2308 classes[1] = X86_64_SSEUP_CLASS;
2309 return 2;
2310 case E_DImode:
2311 case E_SImode:
2312 case E_HImode:
2313 case E_QImode:
2314 case E_CSImode:
2315 case E_CHImode:
2316 case E_CQImode:
2318 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2320 /* Analyze last 128 bits only. */
2321 size = (size - 1) & 0x7f;
2323 if (size < 32)
2325 classes[0] = X86_64_INTEGERSI_CLASS;
2326 return 1;
2328 else if (size < 64)
2330 classes[0] = X86_64_INTEGER_CLASS;
2331 return 1;
2333 else if (size < 64+32)
2335 classes[0] = X86_64_INTEGER_CLASS;
2336 classes[1] = X86_64_INTEGERSI_CLASS;
2337 return 2;
2339 else if (size < 64+64)
2341 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2342 return 2;
2344 else
2345 gcc_unreachable ();
2347 case E_CDImode:
2348 case E_TImode:
2349 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2350 return 2;
2351 case E_COImode:
2352 case E_OImode:
2353 /* OImode shouldn't be used directly. */
2354 gcc_unreachable ();
2355 case E_CTImode:
2356 return 0;
2357 case E_SFmode:
2358 if (!(bit_offset % 64))
2359 classes[0] = X86_64_SSESF_CLASS;
2360 else
2361 classes[0] = X86_64_SSE_CLASS;
2362 return 1;
2363 case E_DFmode:
2364 classes[0] = X86_64_SSEDF_CLASS;
2365 return 1;
2366 case E_XFmode:
2367 classes[0] = X86_64_X87_CLASS;
2368 classes[1] = X86_64_X87UP_CLASS;
2369 return 2;
2370 case E_TFmode:
2371 classes[0] = X86_64_SSE_CLASS;
2372 classes[1] = X86_64_SSEUP_CLASS;
2373 return 2;
2374 case E_SCmode:
2375 classes[0] = X86_64_SSE_CLASS;
2376 if (!(bit_offset % 64))
2377 return 1;
2378 else
2380 static bool warned;
2382 if (!warned && warn_psabi)
2384 warned = true;
2385 inform (input_location,
2386 "the ABI of passing structure with %<complex float%>"
2387 " member has changed in GCC 4.4");
2389 classes[1] = X86_64_SSESF_CLASS;
2390 return 2;
2392 case E_DCmode:
2393 classes[0] = X86_64_SSEDF_CLASS;
2394 classes[1] = X86_64_SSEDF_CLASS;
2395 return 2;
2396 case E_XCmode:
2397 classes[0] = X86_64_COMPLEX_X87_CLASS;
2398 return 1;
2399 case E_TCmode:
2400 /* This modes is larger than 16 bytes. */
2401 return 0;
2402 case E_V8SFmode:
2403 case E_V8SImode:
2404 case E_V32QImode:
2405 case E_V16HImode:
2406 case E_V4DFmode:
2407 case E_V4DImode:
2408 classes[0] = X86_64_SSE_CLASS;
2409 classes[1] = X86_64_SSEUP_CLASS;
2410 classes[2] = X86_64_SSEUP_CLASS;
2411 classes[3] = X86_64_SSEUP_CLASS;
2412 return 4;
2413 case E_V8DFmode:
2414 case E_V16SFmode:
2415 case E_V8DImode:
2416 case E_V16SImode:
2417 case E_V32HImode:
2418 case E_V64QImode:
2419 classes[0] = X86_64_SSE_CLASS;
2420 classes[1] = X86_64_SSEUP_CLASS;
2421 classes[2] = X86_64_SSEUP_CLASS;
2422 classes[3] = X86_64_SSEUP_CLASS;
2423 classes[4] = X86_64_SSEUP_CLASS;
2424 classes[5] = X86_64_SSEUP_CLASS;
2425 classes[6] = X86_64_SSEUP_CLASS;
2426 classes[7] = X86_64_SSEUP_CLASS;
2427 return 8;
2428 case E_V4SFmode:
2429 case E_V4SImode:
2430 case E_V16QImode:
2431 case E_V8HImode:
2432 case E_V2DFmode:
2433 case E_V2DImode:
2434 classes[0] = X86_64_SSE_CLASS;
2435 classes[1] = X86_64_SSEUP_CLASS;
2436 return 2;
2437 case E_V1TImode:
2438 case E_V1DImode:
2439 case E_V2SFmode:
2440 case E_V2SImode:
2441 case E_V4HImode:
2442 case E_V8QImode:
2443 classes[0] = X86_64_SSE_CLASS;
2444 return 1;
2445 case E_BLKmode:
2446 case E_VOIDmode:
2447 return 0;
2448 default:
2449 gcc_assert (VECTOR_MODE_P (mode));
2451 if (bytes > 16)
2452 return 0;
2454 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2456 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2457 classes[0] = X86_64_INTEGERSI_CLASS;
2458 else
2459 classes[0] = X86_64_INTEGER_CLASS;
2460 classes[1] = X86_64_INTEGER_CLASS;
2461 return 1 + (bytes > 8);
2465 /* Examine the argument and return set number of register required in each
2466 class. Return true iff parameter should be passed in memory. */
2468 static bool
2469 examine_argument (machine_mode mode, const_tree type, int in_return,
2470 int *int_nregs, int *sse_nregs)
2472 enum x86_64_reg_class regclass[MAX_CLASSES];
2473 int n = classify_argument (mode, type, regclass, 0);
2475 *int_nregs = 0;
2476 *sse_nregs = 0;
2478 if (!n)
2479 return true;
2480 for (n--; n >= 0; n--)
2481 switch (regclass[n])
2483 case X86_64_INTEGER_CLASS:
2484 case X86_64_INTEGERSI_CLASS:
2485 (*int_nregs)++;
2486 break;
2487 case X86_64_SSE_CLASS:
2488 case X86_64_SSESF_CLASS:
2489 case X86_64_SSEDF_CLASS:
2490 (*sse_nregs)++;
2491 break;
2492 case X86_64_NO_CLASS:
2493 case X86_64_SSEUP_CLASS:
2494 break;
2495 case X86_64_X87_CLASS:
2496 case X86_64_X87UP_CLASS:
2497 case X86_64_COMPLEX_X87_CLASS:
2498 if (!in_return)
2499 return true;
2500 break;
2501 case X86_64_MEMORY_CLASS:
2502 gcc_unreachable ();
2505 return false;
2508 /* Construct container for the argument used by GCC interface. See
2509 FUNCTION_ARG for the detailed description. */
2511 static rtx
2512 construct_container (machine_mode mode, machine_mode orig_mode,
2513 const_tree type, int in_return, int nintregs, int nsseregs,
2514 const int *intreg, int sse_regno)
2516 /* The following variables hold the static issued_error state. */
2517 static bool issued_sse_arg_error;
2518 static bool issued_sse_ret_error;
2519 static bool issued_x87_ret_error;
2521 machine_mode tmpmode;
2522 int bytes
2523 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2524 enum x86_64_reg_class regclass[MAX_CLASSES];
2525 int n;
2526 int i;
2527 int nexps = 0;
2528 int needed_sseregs, needed_intregs;
2529 rtx exp[MAX_CLASSES];
2530 rtx ret;
2532 n = classify_argument (mode, type, regclass, 0);
2533 if (!n)
2534 return NULL;
2535 if (examine_argument (mode, type, in_return, &needed_intregs,
2536 &needed_sseregs))
2537 return NULL;
2538 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2539 return NULL;
2541 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2542 some less clueful developer tries to use floating-point anyway. */
2543 if (needed_sseregs && !TARGET_SSE)
2545 if (in_return)
2547 if (!issued_sse_ret_error)
2549 error ("SSE register return with SSE disabled");
2550 issued_sse_ret_error = true;
2553 else if (!issued_sse_arg_error)
2555 error ("SSE register argument with SSE disabled");
2556 issued_sse_arg_error = true;
2558 return NULL;
2561 /* Likewise, error if the ABI requires us to return values in the
2562 x87 registers and the user specified -mno-80387. */
2563 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2564 for (i = 0; i < n; i++)
2565 if (regclass[i] == X86_64_X87_CLASS
2566 || regclass[i] == X86_64_X87UP_CLASS
2567 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2569 if (!issued_x87_ret_error)
2571 error ("x87 register return with x87 disabled");
2572 issued_x87_ret_error = true;
2574 return NULL;
2577 /* First construct simple cases. Avoid SCmode, since we want to use
2578 single register to pass this type. */
2579 if (n == 1 && mode != SCmode)
2580 switch (regclass[0])
2582 case X86_64_INTEGER_CLASS:
2583 case X86_64_INTEGERSI_CLASS:
2584 return gen_rtx_REG (mode, intreg[0]);
2585 case X86_64_SSE_CLASS:
2586 case X86_64_SSESF_CLASS:
2587 case X86_64_SSEDF_CLASS:
2588 if (mode != BLKmode)
2589 return gen_reg_or_parallel (mode, orig_mode,
2590 GET_SSE_REGNO (sse_regno));
2591 break;
2592 case X86_64_X87_CLASS:
2593 case X86_64_COMPLEX_X87_CLASS:
2594 return gen_rtx_REG (mode, FIRST_STACK_REG);
2595 case X86_64_NO_CLASS:
2596 /* Zero sized array, struct or class. */
2597 return NULL;
2598 default:
2599 gcc_unreachable ();
2601 if (n == 2
2602 && regclass[0] == X86_64_SSE_CLASS
2603 && regclass[1] == X86_64_SSEUP_CLASS
2604 && mode != BLKmode)
2605 return gen_reg_or_parallel (mode, orig_mode,
2606 GET_SSE_REGNO (sse_regno));
2607 if (n == 4
2608 && regclass[0] == X86_64_SSE_CLASS
2609 && regclass[1] == X86_64_SSEUP_CLASS
2610 && regclass[2] == X86_64_SSEUP_CLASS
2611 && regclass[3] == X86_64_SSEUP_CLASS
2612 && mode != BLKmode)
2613 return gen_reg_or_parallel (mode, orig_mode,
2614 GET_SSE_REGNO (sse_regno));
2615 if (n == 8
2616 && regclass[0] == X86_64_SSE_CLASS
2617 && regclass[1] == X86_64_SSEUP_CLASS
2618 && regclass[2] == X86_64_SSEUP_CLASS
2619 && regclass[3] == X86_64_SSEUP_CLASS
2620 && regclass[4] == X86_64_SSEUP_CLASS
2621 && regclass[5] == X86_64_SSEUP_CLASS
2622 && regclass[6] == X86_64_SSEUP_CLASS
2623 && regclass[7] == X86_64_SSEUP_CLASS
2624 && mode != BLKmode)
2625 return gen_reg_or_parallel (mode, orig_mode,
2626 GET_SSE_REGNO (sse_regno));
2627 if (n == 2
2628 && regclass[0] == X86_64_X87_CLASS
2629 && regclass[1] == X86_64_X87UP_CLASS)
2630 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2632 if (n == 2
2633 && regclass[0] == X86_64_INTEGER_CLASS
2634 && regclass[1] == X86_64_INTEGER_CLASS
2635 && (mode == CDImode || mode == TImode || mode == BLKmode)
2636 && intreg[0] + 1 == intreg[1])
2638 if (mode == BLKmode)
2640 /* Use TImode for BLKmode values in 2 integer registers. */
2641 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2642 gen_rtx_REG (TImode, intreg[0]),
2643 GEN_INT (0));
2644 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2645 XVECEXP (ret, 0, 0) = exp[0];
2646 return ret;
2648 else
2649 return gen_rtx_REG (mode, intreg[0]);
2652 /* Otherwise figure out the entries of the PARALLEL. */
2653 for (i = 0; i < n; i++)
2655 int pos;
2657 switch (regclass[i])
2659 case X86_64_NO_CLASS:
2660 break;
2661 case X86_64_INTEGER_CLASS:
2662 case X86_64_INTEGERSI_CLASS:
2663 /* Merge TImodes on aligned occasions here too. */
2664 if (i * 8 + 8 > bytes)
2666 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2667 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2668 /* We've requested 24 bytes we
2669 don't have mode for. Use DImode. */
2670 tmpmode = DImode;
2672 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2673 tmpmode = SImode;
2674 else
2675 tmpmode = DImode;
2676 exp [nexps++]
2677 = gen_rtx_EXPR_LIST (VOIDmode,
2678 gen_rtx_REG (tmpmode, *intreg),
2679 GEN_INT (i*8));
2680 intreg++;
2681 break;
2682 case X86_64_SSESF_CLASS:
2683 exp [nexps++]
2684 = gen_rtx_EXPR_LIST (VOIDmode,
2685 gen_rtx_REG (SFmode,
2686 GET_SSE_REGNO (sse_regno)),
2687 GEN_INT (i*8));
2688 sse_regno++;
2689 break;
2690 case X86_64_SSEDF_CLASS:
2691 exp [nexps++]
2692 = gen_rtx_EXPR_LIST (VOIDmode,
2693 gen_rtx_REG (DFmode,
2694 GET_SSE_REGNO (sse_regno)),
2695 GEN_INT (i*8));
2696 sse_regno++;
2697 break;
2698 case X86_64_SSE_CLASS:
2699 pos = i;
2700 switch (n)
2702 case 1:
2703 tmpmode = DImode;
2704 break;
2705 case 2:
2706 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2708 tmpmode = TImode;
2709 i++;
2711 else
2712 tmpmode = DImode;
2713 break;
2714 case 4:
2715 gcc_assert (i == 0
2716 && regclass[1] == X86_64_SSEUP_CLASS
2717 && regclass[2] == X86_64_SSEUP_CLASS
2718 && regclass[3] == X86_64_SSEUP_CLASS);
2719 tmpmode = OImode;
2720 i += 3;
2721 break;
2722 case 8:
2723 gcc_assert (i == 0
2724 && regclass[1] == X86_64_SSEUP_CLASS
2725 && regclass[2] == X86_64_SSEUP_CLASS
2726 && regclass[3] == X86_64_SSEUP_CLASS
2727 && regclass[4] == X86_64_SSEUP_CLASS
2728 && regclass[5] == X86_64_SSEUP_CLASS
2729 && regclass[6] == X86_64_SSEUP_CLASS
2730 && regclass[7] == X86_64_SSEUP_CLASS);
2731 tmpmode = XImode;
2732 i += 7;
2733 break;
2734 default:
2735 gcc_unreachable ();
2737 exp [nexps++]
2738 = gen_rtx_EXPR_LIST (VOIDmode,
2739 gen_rtx_REG (tmpmode,
2740 GET_SSE_REGNO (sse_regno)),
2741 GEN_INT (pos*8));
2742 sse_regno++;
2743 break;
2744 default:
2745 gcc_unreachable ();
2749 /* Empty aligned struct, union or class. */
2750 if (nexps == 0)
2751 return NULL;
2753 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2754 for (i = 0; i < nexps; i++)
2755 XVECEXP (ret, 0, i) = exp [i];
2756 return ret;
2759 /* Update the data in CUM to advance over an argument of mode MODE
2760 and data type TYPE. (TYPE is null for libcalls where that information
2761 may not be available.)
2763 Return a number of integer regsiters advanced over. */
2765 static int
2766 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2767 const_tree type, HOST_WIDE_INT bytes,
2768 HOST_WIDE_INT words)
2770 int res = 0;
2771 bool error_p = false;
2773 if (TARGET_IAMCU)
2775 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2776 bytes in registers. */
2777 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2778 goto pass_in_reg;
2779 return res;
2782 switch (mode)
2784 default:
2785 break;
2787 case E_BLKmode:
2788 if (bytes < 0)
2789 break;
2790 /* FALLTHRU */
2792 case E_DImode:
2793 case E_SImode:
2794 case E_HImode:
2795 case E_QImode:
2796 pass_in_reg:
2797 cum->words += words;
2798 cum->nregs -= words;
2799 cum->regno += words;
2800 if (cum->nregs >= 0)
2801 res = words;
2802 if (cum->nregs <= 0)
2804 cum->nregs = 0;
2805 cfun->machine->arg_reg_available = false;
2806 cum->regno = 0;
2808 break;
2810 case E_OImode:
2811 /* OImode shouldn't be used directly. */
2812 gcc_unreachable ();
2814 case E_DFmode:
2815 if (cum->float_in_sse == -1)
2816 error_p = true;
2817 if (cum->float_in_sse < 2)
2818 break;
2819 /* FALLTHRU */
2820 case E_SFmode:
2821 if (cum->float_in_sse == -1)
2822 error_p = true;
2823 if (cum->float_in_sse < 1)
2824 break;
2825 /* FALLTHRU */
2827 case E_V8SFmode:
2828 case E_V8SImode:
2829 case E_V64QImode:
2830 case E_V32HImode:
2831 case E_V16SImode:
2832 case E_V8DImode:
2833 case E_V16SFmode:
2834 case E_V8DFmode:
2835 case E_V32QImode:
2836 case E_V16HImode:
2837 case E_V4DFmode:
2838 case E_V4DImode:
2839 case E_TImode:
2840 case E_V16QImode:
2841 case E_V8HImode:
2842 case E_V4SImode:
2843 case E_V2DImode:
2844 case E_V4SFmode:
2845 case E_V2DFmode:
2846 if (!type || !AGGREGATE_TYPE_P (type))
2848 cum->sse_words += words;
2849 cum->sse_nregs -= 1;
2850 cum->sse_regno += 1;
2851 if (cum->sse_nregs <= 0)
2853 cum->sse_nregs = 0;
2854 cum->sse_regno = 0;
2857 break;
2859 case E_V8QImode:
2860 case E_V4HImode:
2861 case E_V2SImode:
2862 case E_V2SFmode:
2863 case E_V1TImode:
2864 case E_V1DImode:
2865 if (!type || !AGGREGATE_TYPE_P (type))
2867 cum->mmx_words += words;
2868 cum->mmx_nregs -= 1;
2869 cum->mmx_regno += 1;
2870 if (cum->mmx_nregs <= 0)
2872 cum->mmx_nregs = 0;
2873 cum->mmx_regno = 0;
2876 break;
2878 if (error_p)
2880 cum->float_in_sse = 0;
2881 error ("calling %qD with SSE calling convention without "
2882 "SSE/SSE2 enabled", cum->decl);
2883 sorry ("this is a GCC bug that can be worked around by adding "
2884 "attribute used to function called");
2887 return res;
2890 static int
2891 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
2892 const_tree type, HOST_WIDE_INT words, bool named)
2894 int int_nregs, sse_nregs;
2896 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
2897 if (!named && (VALID_AVX512F_REG_MODE (mode)
2898 || VALID_AVX256_REG_MODE (mode)))
2899 return 0;
2901 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
2902 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2904 cum->nregs -= int_nregs;
2905 cum->sse_nregs -= sse_nregs;
2906 cum->regno += int_nregs;
2907 cum->sse_regno += sse_nregs;
2908 return int_nregs;
2910 else
2912 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
2913 cum->words = ROUND_UP (cum->words, align);
2914 cum->words += words;
2915 return 0;
2919 static int
2920 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
2921 HOST_WIDE_INT words)
2923 /* Otherwise, this should be passed indirect. */
2924 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
2926 cum->words += words;
2927 if (cum->nregs > 0)
2929 cum->nregs -= 1;
2930 cum->regno += 1;
2931 return 1;
2933 return 0;
2936 /* Update the data in CUM to advance over argument ARG. */
2938 static void
2939 ix86_function_arg_advance (cumulative_args_t cum_v,
2940 const function_arg_info &arg)
2942 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2943 machine_mode mode = arg.mode;
2944 HOST_WIDE_INT bytes, words;
2945 int nregs;
2947 /* The argument of interrupt handler is a special case and is
2948 handled in ix86_function_arg. */
2949 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
2950 return;
2952 bytes = arg.promoted_size_in_bytes ();
2953 words = CEIL (bytes, UNITS_PER_WORD);
2955 if (arg.type)
2956 mode = type_natural_mode (arg.type, NULL, false);
2958 if (TARGET_64BIT)
2960 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
2962 if (call_abi == MS_ABI)
2963 nregs = function_arg_advance_ms_64 (cum, bytes, words);
2964 else
2965 nregs = function_arg_advance_64 (cum, mode, arg.type, words,
2966 arg.named);
2968 else
2969 nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
2971 if (!nregs)
2973 /* Track if there are outgoing arguments on stack. */
2974 if (cum->caller)
2975 cfun->machine->outgoing_args_on_stack = true;
2979 /* Define where to put the arguments to a function.
2980 Value is zero to push the argument on the stack,
2981 or a hard register in which to store the argument.
2983 MODE is the argument's machine mode.
2984 TYPE is the data type of the argument (as a tree).
2985 This is null for libcalls where that information may
2986 not be available.
2987 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2988 the preceding args and about the function being called.
2989 NAMED is nonzero if this argument is a named parameter
2990 (otherwise it is an extra parameter matching an ellipsis). */
2992 static rtx
2993 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2994 machine_mode orig_mode, const_tree type,
2995 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
2997 bool error_p = false;
2999 /* Avoid the AL settings for the Unix64 ABI. */
3000 if (mode == VOIDmode)
3001 return constm1_rtx;
3003 if (TARGET_IAMCU)
3005 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3006 bytes in registers. */
3007 if (!VECTOR_MODE_P (mode) && bytes <= 8)
3008 goto pass_in_reg;
3009 return NULL_RTX;
3012 switch (mode)
3014 default:
3015 break;
3017 case E_BLKmode:
3018 if (bytes < 0)
3019 break;
3020 /* FALLTHRU */
3021 case E_DImode:
3022 case E_SImode:
3023 case E_HImode:
3024 case E_QImode:
3025 pass_in_reg:
3026 if (words <= cum->nregs)
3028 int regno = cum->regno;
3030 /* Fastcall allocates the first two DWORD (SImode) or
3031 smaller arguments to ECX and EDX if it isn't an
3032 aggregate type . */
3033 if (cum->fastcall)
3035 if (mode == BLKmode
3036 || mode == DImode
3037 || (type && AGGREGATE_TYPE_P (type)))
3038 break;
3040 /* ECX not EAX is the first allocated register. */
3041 if (regno == AX_REG)
3042 regno = CX_REG;
3044 return gen_rtx_REG (mode, regno);
3046 break;
3048 case E_DFmode:
3049 if (cum->float_in_sse == -1)
3050 error_p = true;
3051 if (cum->float_in_sse < 2)
3052 break;
3053 /* FALLTHRU */
3054 case E_SFmode:
3055 if (cum->float_in_sse == -1)
3056 error_p = true;
3057 if (cum->float_in_sse < 1)
3058 break;
3059 /* FALLTHRU */
3060 case E_TImode:
3061 /* In 32bit, we pass TImode in xmm registers. */
3062 case E_V16QImode:
3063 case E_V8HImode:
3064 case E_V4SImode:
3065 case E_V2DImode:
3066 case E_V4SFmode:
3067 case E_V2DFmode:
3068 if (!type || !AGGREGATE_TYPE_P (type))
3070 if (cum->sse_nregs)
3071 return gen_reg_or_parallel (mode, orig_mode,
3072 cum->sse_regno + FIRST_SSE_REG);
3074 break;
3076 case E_OImode:
3077 case E_XImode:
3078 /* OImode and XImode shouldn't be used directly. */
3079 gcc_unreachable ();
3081 case E_V64QImode:
3082 case E_V32HImode:
3083 case E_V16SImode:
3084 case E_V8DImode:
3085 case E_V16SFmode:
3086 case E_V8DFmode:
3087 case E_V8SFmode:
3088 case E_V8SImode:
3089 case E_V32QImode:
3090 case E_V16HImode:
3091 case E_V4DFmode:
3092 case E_V4DImode:
3093 if (!type || !AGGREGATE_TYPE_P (type))
3095 if (cum->sse_nregs)
3096 return gen_reg_or_parallel (mode, orig_mode,
3097 cum->sse_regno + FIRST_SSE_REG);
3099 break;
3101 case E_V8QImode:
3102 case E_V4HImode:
3103 case E_V2SImode:
3104 case E_V2SFmode:
3105 case E_V1TImode:
3106 case E_V1DImode:
3107 if (!type || !AGGREGATE_TYPE_P (type))
3109 if (cum->mmx_nregs)
3110 return gen_reg_or_parallel (mode, orig_mode,
3111 cum->mmx_regno + FIRST_MMX_REG);
3113 break;
3115 if (error_p)
3117 cum->float_in_sse = 0;
3118 error ("calling %qD with SSE calling convention without "
3119 "SSE/SSE2 enabled", cum->decl);
3120 sorry ("this is a GCC bug that can be worked around by adding "
3121 "attribute used to function called");
3124 return NULL_RTX;
3127 static rtx
3128 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3129 machine_mode orig_mode, const_tree type, bool named)
3131 /* Handle a hidden AL argument containing number of registers
3132 for varargs x86-64 functions. */
3133 if (mode == VOIDmode)
3134 return GEN_INT (cum->maybe_vaarg
3135 ? (cum->sse_nregs < 0
3136 ? X86_64_SSE_REGPARM_MAX
3137 : cum->sse_regno)
3138 : -1);
3140 switch (mode)
3142 default:
3143 break;
3145 case E_V8SFmode:
3146 case E_V8SImode:
3147 case E_V32QImode:
3148 case E_V16HImode:
3149 case E_V4DFmode:
3150 case E_V4DImode:
3151 case E_V16SFmode:
3152 case E_V16SImode:
3153 case E_V64QImode:
3154 case E_V32HImode:
3155 case E_V8DFmode:
3156 case E_V8DImode:
3157 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3158 if (!named)
3159 return NULL;
3160 break;
3163 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3164 cum->sse_nregs,
3165 &x86_64_int_parameter_registers [cum->regno],
3166 cum->sse_regno);
3169 static rtx
3170 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3171 machine_mode orig_mode, bool named, const_tree type,
3172 HOST_WIDE_INT bytes)
3174 unsigned int regno;
3176 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3177 We use value of -2 to specify that current function call is MSABI. */
3178 if (mode == VOIDmode)
3179 return GEN_INT (-2);
3181 /* If we've run out of registers, it goes on the stack. */
3182 if (cum->nregs == 0)
3183 return NULL_RTX;
3185 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3187 /* Only floating point modes are passed in anything but integer regs. */
3188 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3190 if (named)
3192 if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3193 regno = cum->regno + FIRST_SSE_REG;
3195 else
3197 rtx t1, t2;
3199 /* Unnamed floating parameters are passed in both the
3200 SSE and integer registers. */
3201 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3202 t2 = gen_rtx_REG (mode, regno);
3203 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3204 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3205 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3208 /* Handle aggregated types passed in register. */
3209 if (orig_mode == BLKmode)
3211 if (bytes > 0 && bytes <= 8)
3212 mode = (bytes > 4 ? DImode : SImode);
3213 if (mode == BLKmode)
3214 mode = DImode;
3217 return gen_reg_or_parallel (mode, orig_mode, regno);
3220 /* Return where to put the arguments to a function.
3221 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3223 ARG describes the argument while CUM gives information about the
3224 preceding args and about the function being called. */
3226 static rtx
3227 ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3229 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3230 machine_mode mode = arg.mode;
3231 HOST_WIDE_INT bytes, words;
3232 rtx reg;
3234 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3236 gcc_assert (arg.type != NULL_TREE);
3237 if (POINTER_TYPE_P (arg.type))
3239 /* This is the pointer argument. */
3240 gcc_assert (TYPE_MODE (arg.type) == Pmode);
3241 /* It is at -WORD(AP) in the current frame in interrupt and
3242 exception handlers. */
3243 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3245 else
3247 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3248 && TREE_CODE (arg.type) == INTEGER_TYPE
3249 && TYPE_MODE (arg.type) == word_mode);
3250 /* The error code is the word-mode integer argument at
3251 -2 * WORD(AP) in the current frame of the exception
3252 handler. */
3253 reg = gen_rtx_MEM (word_mode,
3254 plus_constant (Pmode,
3255 arg_pointer_rtx,
3256 -2 * UNITS_PER_WORD));
3258 return reg;
3261 bytes = arg.promoted_size_in_bytes ();
3262 words = CEIL (bytes, UNITS_PER_WORD);
3264 /* To simplify the code below, represent vector types with a vector mode
3265 even if MMX/SSE are not active. */
3266 if (arg.type && TREE_CODE (arg.type) == VECTOR_TYPE)
3267 mode = type_natural_mode (arg.type, cum, false);
3269 if (TARGET_64BIT)
3271 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3273 if (call_abi == MS_ABI)
3274 reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3275 arg.type, bytes);
3276 else
3277 reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3279 else
3280 reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3282 /* Track if there are outgoing arguments on stack. */
3283 if (reg == NULL_RTX && cum->caller)
3284 cfun->machine->outgoing_args_on_stack = true;
3286 return reg;
3289 /* A C expression that indicates when an argument must be passed by
3290 reference. If nonzero for an argument, a copy of that argument is
3291 made in memory and a pointer to the argument is passed instead of
3292 the argument itself. The pointer is passed in whatever way is
3293 appropriate for passing a pointer to that type. */
3295 static bool
3296 ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3298 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3300 if (TARGET_64BIT)
3302 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3304 /* See Windows x64 Software Convention. */
3305 if (call_abi == MS_ABI)
3307 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3309 if (tree type = arg.type)
3311 /* Arrays are passed by reference. */
3312 if (TREE_CODE (type) == ARRAY_TYPE)
3313 return true;
3315 if (RECORD_OR_UNION_TYPE_P (type))
3317 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3318 are passed by reference. */
3319 msize = int_size_in_bytes (type);
3323 /* __m128 is passed by reference. */
3324 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3326 else if (arg.type && int_size_in_bytes (arg.type) == -1)
3327 return true;
3330 return false;
3333 /* Return true when TYPE should be 128bit aligned for 32bit argument
3334 passing ABI. XXX: This function is obsolete and is only used for
3335 checking psABI compatibility with previous versions of GCC. */
3337 static bool
3338 ix86_compat_aligned_value_p (const_tree type)
3340 machine_mode mode = TYPE_MODE (type);
3341 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3342 || mode == TDmode
3343 || mode == TFmode
3344 || mode == TCmode)
3345 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3346 return true;
3347 if (TYPE_ALIGN (type) < 128)
3348 return false;
3350 if (AGGREGATE_TYPE_P (type))
3352 /* Walk the aggregates recursively. */
3353 switch (TREE_CODE (type))
3355 case RECORD_TYPE:
3356 case UNION_TYPE:
3357 case QUAL_UNION_TYPE:
3359 tree field;
3361 /* Walk all the structure fields. */
3362 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3364 if (TREE_CODE (field) == FIELD_DECL
3365 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3366 return true;
3368 break;
3371 case ARRAY_TYPE:
3372 /* Just for use if some languages passes arrays by value. */
3373 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3374 return true;
3375 break;
3377 default:
3378 gcc_unreachable ();
3381 return false;
3384 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3385 XXX: This function is obsolete and is only used for checking psABI
3386 compatibility with previous versions of GCC. */
3388 static unsigned int
3389 ix86_compat_function_arg_boundary (machine_mode mode,
3390 const_tree type, unsigned int align)
3392 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3393 natural boundaries. */
3394 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3396 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3397 make an exception for SSE modes since these require 128bit
3398 alignment.
3400 The handling here differs from field_alignment. ICC aligns MMX
3401 arguments to 4 byte boundaries, while structure fields are aligned
3402 to 8 byte boundaries. */
3403 if (!type)
3405 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3406 align = PARM_BOUNDARY;
3408 else
3410 if (!ix86_compat_aligned_value_p (type))
3411 align = PARM_BOUNDARY;
3414 if (align > BIGGEST_ALIGNMENT)
3415 align = BIGGEST_ALIGNMENT;
3416 return align;
3419 /* Return true when TYPE should be 128bit aligned for 32bit argument
3420 passing ABI. */
3422 static bool
3423 ix86_contains_aligned_value_p (const_tree type)
3425 machine_mode mode = TYPE_MODE (type);
3427 if (mode == XFmode || mode == XCmode)
3428 return false;
3430 if (TYPE_ALIGN (type) < 128)
3431 return false;
3433 if (AGGREGATE_TYPE_P (type))
3435 /* Walk the aggregates recursively. */
3436 switch (TREE_CODE (type))
3438 case RECORD_TYPE:
3439 case UNION_TYPE:
3440 case QUAL_UNION_TYPE:
3442 tree field;
3444 /* Walk all the structure fields. */
3445 for (field = TYPE_FIELDS (type);
3446 field;
3447 field = DECL_CHAIN (field))
3449 if (TREE_CODE (field) == FIELD_DECL
3450 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3451 return true;
3453 break;
3456 case ARRAY_TYPE:
3457 /* Just for use if some languages passes arrays by value. */
3458 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3459 return true;
3460 break;
3462 default:
3463 gcc_unreachable ();
3466 else
3467 return TYPE_ALIGN (type) >= 128;
3469 return false;
3472 /* Gives the alignment boundary, in bits, of an argument with the
3473 specified mode and type. */
3475 static unsigned int
3476 ix86_function_arg_boundary (machine_mode mode, const_tree type)
3478 unsigned int align;
3479 if (type)
3481 /* Since the main variant type is used for call, we convert it to
3482 the main variant type. */
3483 type = TYPE_MAIN_VARIANT (type);
3484 align = TYPE_ALIGN (type);
3485 if (TYPE_EMPTY_P (type))
3486 return PARM_BOUNDARY;
3488 else
3489 align = GET_MODE_ALIGNMENT (mode);
3490 if (align < PARM_BOUNDARY)
3491 align = PARM_BOUNDARY;
3492 else
3494 static bool warned;
3495 unsigned int saved_align = align;
3497 if (!TARGET_64BIT)
3499 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3500 if (!type)
3502 if (mode == XFmode || mode == XCmode)
3503 align = PARM_BOUNDARY;
3505 else if (!ix86_contains_aligned_value_p (type))
3506 align = PARM_BOUNDARY;
3508 if (align < 128)
3509 align = PARM_BOUNDARY;
3512 if (warn_psabi
3513 && !warned
3514 && align != ix86_compat_function_arg_boundary (mode, type,
3515 saved_align))
3517 warned = true;
3518 inform (input_location,
3519 "the ABI for passing parameters with %d-byte"
3520 " alignment has changed in GCC 4.6",
3521 align / BITS_PER_UNIT);
3525 return align;
3528 /* Return true if N is a possible register number of function value. */
3530 static bool
3531 ix86_function_value_regno_p (const unsigned int regno)
3533 switch (regno)
3535 case AX_REG:
3536 return true;
3537 case DX_REG:
3538 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3539 case DI_REG:
3540 case SI_REG:
3541 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3543 /* Complex values are returned in %st(0)/%st(1) pair. */
3544 case ST0_REG:
3545 case ST1_REG:
3546 /* TODO: The function should depend on current function ABI but
3547 builtins.c would need updating then. Therefore we use the
3548 default ABI. */
3549 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3550 return false;
3551 return TARGET_FLOAT_RETURNS_IN_80387;
3553 /* Complex values are returned in %xmm0/%xmm1 pair. */
3554 case XMM0_REG:
3555 case XMM1_REG:
3556 return TARGET_SSE;
3558 case MM0_REG:
3559 if (TARGET_MACHO || TARGET_64BIT)
3560 return false;
3561 return TARGET_MMX;
3564 return false;
3567 /* Check whether the register REGNO should be zeroed on X86.
3568 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3569 together, no need to zero it again.
3570 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3572 static bool
3573 zero_call_used_regno_p (const unsigned int regno,
3574 bool all_sse_zeroed,
3575 bool need_zero_mmx)
3577 return GENERAL_REGNO_P (regno)
3578 || (!all_sse_zeroed && SSE_REGNO_P (regno))
3579 || MASK_REGNO_P (regno)
3580 || (need_zero_mmx && MMX_REGNO_P (regno));
3583 /* Return the machine_mode that is used to zero register REGNO. */
3585 static machine_mode
3586 zero_call_used_regno_mode (const unsigned int regno)
3588 /* NB: We only need to zero the lower 32 bits for integer registers
3589 and the lower 128 bits for vector registers since destination are
3590 zero-extended to the full register width. */
3591 if (GENERAL_REGNO_P (regno))
3592 return SImode;
3593 else if (SSE_REGNO_P (regno))
3594 return V4SFmode;
3595 else if (MASK_REGNO_P (regno))
3596 return HImode;
3597 else if (MMX_REGNO_P (regno))
3598 return V4HImode;
3599 else
3600 gcc_unreachable ();
3603 /* Generate a rtx to zero all vector registers together if possible,
3604 otherwise, return NULL. */
3606 static rtx
3607 zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3609 if (!TARGET_AVX)
3610 return NULL;
3612 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3613 if ((IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG)
3614 || (TARGET_64BIT
3615 && (REX_SSE_REGNO_P (regno)
3616 || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3617 && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3618 return NULL;
3620 return gen_avx_vzeroall ();
3623 /* Generate insns to zero all st registers together.
3624 Return true when zeroing instructions are generated.
3625 Assume the number of st registers that are zeroed is num_of_st,
3626 we will emit the following sequence to zero them together:
3627 fldz; \
3628 fldz; \
3630 fldz; \
3631 fstp %%st(0); \
3632 fstp %%st(0); \
3634 fstp %%st(0);
3635 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3636 mark stack slots empty.
3638 How to compute the num_of_st:
3639 There is no direct mapping from stack registers to hard register
3640 numbers. If one stack register needs to be cleared, we don't know
3641 where in the stack the value remains. So, if any stack register
3642 needs to be cleared, the whole stack should be cleared. However,
3643 x87 stack registers that hold the return value should be excluded.
3644 x87 returns in the top (two for complex values) register, so
3645 num_of_st should be 7/6 when x87 returns, otherwise it will be 8. */
3648 static bool
3649 zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3651 unsigned int num_of_st = 0;
3652 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3653 if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
3654 && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3656 num_of_st++;
3657 break;
3660 if (num_of_st == 0)
3661 return false;
3663 bool return_with_x87 = false;
3664 return_with_x87 = (crtl->return_rtx
3665 && (STACK_REG_P (crtl->return_rtx)));
3667 bool complex_return = false;
3668 complex_return = (crtl->return_rtx
3669 && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
3671 if (return_with_x87)
3672 if (complex_return)
3673 num_of_st = 6;
3674 else
3675 num_of_st = 7;
3676 else
3677 num_of_st = 8;
3679 rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
3680 for (unsigned int i = 0; i < num_of_st; i++)
3681 emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
3683 for (unsigned int i = 0; i < num_of_st; i++)
3685 rtx insn;
3686 insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
3687 add_reg_note (insn, REG_DEAD, st_reg);
3689 return true;
3693 /* When the routine exit in MMX mode, if any ST register needs
3694 to be zeroed, we should clear all MMX registers except the
3695 RET_MMX_REGNO that holds the return value. */
3696 static bool
3697 zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
3698 unsigned int ret_mmx_regno)
3700 bool need_zero_all_mm = false;
3701 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3702 if (STACK_REGNO_P (regno)
3703 && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3705 need_zero_all_mm = true;
3706 break;
3709 if (!need_zero_all_mm)
3710 return false;
3712 rtx zero_mmx = NULL_RTX;
3713 machine_mode mode = V4HImode;
3714 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3715 if (regno != ret_mmx_regno)
3717 rtx reg = gen_rtx_REG (mode, regno);
3718 if (zero_mmx == NULL_RTX)
3720 zero_mmx = reg;
3721 emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
3723 else
3724 emit_move_insn (reg, zero_mmx);
3726 return true;
3729 /* TARGET_ZERO_CALL_USED_REGS. */
3730 /* Generate a sequence of instructions that zero registers specified by
3731 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
3732 zeroed. */
3733 static HARD_REG_SET
3734 ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
3736 HARD_REG_SET zeroed_hardregs;
3737 bool all_sse_zeroed = false;
3738 bool all_st_zeroed = false;
3739 bool all_mm_zeroed = false;
3741 CLEAR_HARD_REG_SET (zeroed_hardregs);
3743 /* first, let's see whether we can zero all vector registers together. */
3744 rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
3745 if (zero_all_vec_insn)
3747 emit_insn (zero_all_vec_insn);
3748 all_sse_zeroed = true;
3751 /* mm/st registers are shared registers set, we should follow the following
3752 rules to clear them:
3753 MMX exit mode x87 exit mode
3754 -------------|----------------------|---------------
3755 uses x87 reg | clear all MMX | clear all x87
3756 uses MMX reg | clear individual MMX | clear all x87
3757 x87 + MMX | clear all MMX | clear all x87
3759 first, we should decide which mode (MMX mode or x87 mode) the function
3760 exit with. */
3762 bool exit_with_mmx_mode = (crtl->return_rtx
3763 && (MMX_REG_P (crtl->return_rtx)));
3765 if (!exit_with_mmx_mode)
3766 /* x87 exit mode, we should zero all st registers together. */
3768 all_st_zeroed = zero_all_st_registers (need_zeroed_hardregs);
3769 if (all_st_zeroed)
3770 SET_HARD_REG_BIT (zeroed_hardregs, FIRST_STACK_REG);
3772 else
3773 /* MMX exit mode, check whether we can zero all mm registers. */
3775 unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
3776 all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
3777 exit_mmx_regno);
3778 if (all_mm_zeroed)
3779 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3780 if (regno != exit_mmx_regno)
3781 SET_HARD_REG_BIT (zeroed_hardregs, regno);
3784 /* Now, generate instructions to zero all the other registers. */
3786 rtx zero_gpr = NULL_RTX;
3787 rtx zero_vector = NULL_RTX;
3788 rtx zero_mask = NULL_RTX;
3789 rtx zero_mmx = NULL_RTX;
3791 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3793 if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3794 continue;
3795 if (!zero_call_used_regno_p (regno, all_sse_zeroed,
3796 exit_with_mmx_mode && !all_mm_zeroed))
3797 continue;
3799 SET_HARD_REG_BIT (zeroed_hardregs, regno);
3801 rtx reg, tmp, zero_rtx;
3802 machine_mode mode = zero_call_used_regno_mode (regno);
3804 reg = gen_rtx_REG (mode, regno);
3805 zero_rtx = CONST0_RTX (mode);
3807 if (mode == SImode)
3808 if (zero_gpr == NULL_RTX)
3810 zero_gpr = reg;
3811 tmp = gen_rtx_SET (reg, zero_rtx);
3812 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
3814 rtx clob = gen_rtx_CLOBBER (VOIDmode,
3815 gen_rtx_REG (CCmode,
3816 FLAGS_REG));
3817 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
3818 tmp,
3819 clob));
3821 emit_insn (tmp);
3823 else
3824 emit_move_insn (reg, zero_gpr);
3825 else if (mode == V4SFmode)
3826 if (zero_vector == NULL_RTX)
3828 zero_vector = reg;
3829 tmp = gen_rtx_SET (reg, zero_rtx);
3830 emit_insn (tmp);
3832 else
3833 emit_move_insn (reg, zero_vector);
3834 else if (mode == HImode)
3835 if (zero_mask == NULL_RTX)
3837 zero_mask = reg;
3838 tmp = gen_rtx_SET (reg, zero_rtx);
3839 emit_insn (tmp);
3841 else
3842 emit_move_insn (reg, zero_mask);
3843 else if (mode == V4HImode)
3844 if (zero_mmx == NULL_RTX)
3846 zero_mmx = reg;
3847 tmp = gen_rtx_SET (reg, zero_rtx);
3848 emit_insn (tmp);
3850 else
3851 emit_move_insn (reg, zero_mmx);
3852 else
3853 gcc_unreachable ();
3855 return zeroed_hardregs;
3858 /* Define how to find the value returned by a function.
3859 VALTYPE is the data type of the value (as a tree).
3860 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3861 otherwise, FUNC is 0. */
3863 static rtx
3864 function_value_32 (machine_mode orig_mode, machine_mode mode,
3865 const_tree fntype, const_tree fn)
3867 unsigned int regno;
3869 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3870 we normally prevent this case when mmx is not available. However
3871 some ABIs may require the result to be returned like DImode. */
3872 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3873 regno = FIRST_MMX_REG;
3875 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3876 we prevent this case when sse is not available. However some ABIs
3877 may require the result to be returned like integer TImode. */
3878 else if (mode == TImode
3879 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3880 regno = FIRST_SSE_REG;
3882 /* 32-byte vector modes in %ymm0. */
3883 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
3884 regno = FIRST_SSE_REG;
3886 /* 64-byte vector modes in %zmm0. */
3887 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
3888 regno = FIRST_SSE_REG;
3890 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
3891 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
3892 regno = FIRST_FLOAT_REG;
3893 else
3894 /* Most things go in %eax. */
3895 regno = AX_REG;
3897 /* Override FP return register with %xmm0 for local functions when
3898 SSE math is enabled or for functions with sseregparm attribute. */
3899 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
3901 int sse_level = ix86_function_sseregparm (fntype, fn, false);
3902 if (sse_level == -1)
3904 error ("calling %qD with SSE calling convention without "
3905 "SSE/SSE2 enabled", fn);
3906 sorry ("this is a GCC bug that can be worked around by adding "
3907 "attribute used to function called");
3909 else if ((sse_level >= 1 && mode == SFmode)
3910 || (sse_level == 2 && mode == DFmode))
3911 regno = FIRST_SSE_REG;
3914 /* OImode shouldn't be used directly. */
3915 gcc_assert (mode != OImode);
3917 return gen_rtx_REG (orig_mode, regno);
3920 static rtx
3921 function_value_64 (machine_mode orig_mode, machine_mode mode,
3922 const_tree valtype)
3924 rtx ret;
3926 /* Handle libcalls, which don't provide a type node. */
3927 if (valtype == NULL)
3929 unsigned int regno;
3931 switch (mode)
3933 case E_SFmode:
3934 case E_SCmode:
3935 case E_DFmode:
3936 case E_DCmode:
3937 case E_TFmode:
3938 case E_SDmode:
3939 case E_DDmode:
3940 case E_TDmode:
3941 regno = FIRST_SSE_REG;
3942 break;
3943 case E_XFmode:
3944 case E_XCmode:
3945 regno = FIRST_FLOAT_REG;
3946 break;
3947 case E_TCmode:
3948 return NULL;
3949 default:
3950 regno = AX_REG;
3953 return gen_rtx_REG (mode, regno);
3955 else if (POINTER_TYPE_P (valtype))
3957 /* Pointers are always returned in word_mode. */
3958 mode = word_mode;
3961 ret = construct_container (mode, orig_mode, valtype, 1,
3962 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
3963 x86_64_int_return_registers, 0);
3965 /* For zero sized structures, construct_container returns NULL, but we
3966 need to keep rest of compiler happy by returning meaningful value. */
3967 if (!ret)
3968 ret = gen_rtx_REG (orig_mode, AX_REG);
3970 return ret;
3973 static rtx
3974 function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
3975 const_tree fntype, const_tree fn, const_tree valtype)
3977 unsigned int regno;
3979 /* Floating point return values in %st(0)
3980 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
3981 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
3982 && (GET_MODE_SIZE (mode) > 8
3983 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
3985 regno = FIRST_FLOAT_REG;
3986 return gen_rtx_REG (orig_mode, regno);
3988 else
3989 return function_value_32(orig_mode, mode, fntype,fn);
3992 static rtx
3993 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
3994 const_tree valtype)
3996 unsigned int regno = AX_REG;
3998 if (TARGET_SSE)
4000 switch (GET_MODE_SIZE (mode))
4002 case 16:
4003 if (valtype != NULL_TREE
4004 && !VECTOR_INTEGER_TYPE_P (valtype)
4005 && !VECTOR_INTEGER_TYPE_P (valtype)
4006 && !INTEGRAL_TYPE_P (valtype)
4007 && !VECTOR_FLOAT_TYPE_P (valtype))
4008 break;
4009 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4010 && !COMPLEX_MODE_P (mode))
4011 regno = FIRST_SSE_REG;
4012 break;
4013 case 8:
4014 case 4:
4015 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4016 break;
4017 if (mode == SFmode || mode == DFmode)
4018 regno = FIRST_SSE_REG;
4019 break;
4020 default:
4021 break;
4024 return gen_rtx_REG (orig_mode, regno);
4027 static rtx
4028 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4029 machine_mode orig_mode, machine_mode mode)
4031 const_tree fn, fntype;
4033 fn = NULL_TREE;
4034 if (fntype_or_decl && DECL_P (fntype_or_decl))
4035 fn = fntype_or_decl;
4036 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4038 if (ix86_function_type_abi (fntype) == MS_ABI)
4040 if (TARGET_64BIT)
4041 return function_value_ms_64 (orig_mode, mode, valtype);
4042 else
4043 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4045 else if (TARGET_64BIT)
4046 return function_value_64 (orig_mode, mode, valtype);
4047 else
4048 return function_value_32 (orig_mode, mode, fntype, fn);
4051 static rtx
4052 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4054 machine_mode mode, orig_mode;
4056 orig_mode = TYPE_MODE (valtype);
4057 mode = type_natural_mode (valtype, NULL, true);
4058 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4061 /* Pointer function arguments and return values are promoted to
4062 word_mode for normal functions. */
4064 static machine_mode
4065 ix86_promote_function_mode (const_tree type, machine_mode mode,
4066 int *punsignedp, const_tree fntype,
4067 int for_return)
4069 if (cfun->machine->func_type == TYPE_NORMAL
4070 && type != NULL_TREE
4071 && POINTER_TYPE_P (type))
4073 *punsignedp = POINTERS_EXTEND_UNSIGNED;
4074 return word_mode;
4076 return default_promote_function_mode (type, mode, punsignedp, fntype,
4077 for_return);
4080 /* Return true if a structure, union or array with MODE containing FIELD
4081 should be accessed using BLKmode. */
4083 static bool
4084 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4086 /* Union with XFmode must be in BLKmode. */
4087 return (mode == XFmode
4088 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4089 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4093 ix86_libcall_value (machine_mode mode)
4095 return ix86_function_value_1 (NULL, NULL, mode, mode);
4098 /* Return true iff type is returned in memory. */
4100 static bool
4101 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4103 #ifdef SUBTARGET_RETURN_IN_MEMORY
4104 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
4105 #else
4106 const machine_mode mode = type_natural_mode (type, NULL, true);
4107 HOST_WIDE_INT size;
4109 if (TARGET_64BIT)
4111 if (ix86_function_type_abi (fntype) == MS_ABI)
4113 size = int_size_in_bytes (type);
4115 /* __m128 is returned in xmm0. */
4116 if ((!type || VECTOR_INTEGER_TYPE_P (type)
4117 || INTEGRAL_TYPE_P (type)
4118 || VECTOR_FLOAT_TYPE_P (type))
4119 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4120 && !COMPLEX_MODE_P (mode)
4121 && (GET_MODE_SIZE (mode) == 16 || size == 16))
4122 return false;
4124 /* Otherwise, the size must be exactly in [1248]. */
4125 return size != 1 && size != 2 && size != 4 && size != 8;
4127 else
4129 int needed_intregs, needed_sseregs;
4131 return examine_argument (mode, type, 1,
4132 &needed_intregs, &needed_sseregs);
4135 else
4137 size = int_size_in_bytes (type);
4139 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4140 bytes in registers. */
4141 if (TARGET_IAMCU)
4142 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4144 if (mode == BLKmode)
4145 return true;
4147 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4148 return false;
4150 if (VECTOR_MODE_P (mode) || mode == TImode)
4152 /* User-created vectors small enough to fit in EAX. */
4153 if (size < 8)
4154 return false;
4156 /* Unless ABI prescibes otherwise,
4157 MMX/3dNow values are returned in MM0 if available. */
4159 if (size == 8)
4160 return TARGET_VECT8_RETURNS || !TARGET_MMX;
4162 /* SSE values are returned in XMM0 if available. */
4163 if (size == 16)
4164 return !TARGET_SSE;
4166 /* AVX values are returned in YMM0 if available. */
4167 if (size == 32)
4168 return !TARGET_AVX;
4170 /* AVX512F values are returned in ZMM0 if available. */
4171 if (size == 64)
4172 return !TARGET_AVX512F;
4175 if (mode == XFmode)
4176 return false;
4178 if (size > 12)
4179 return true;
4181 /* OImode shouldn't be used directly. */
4182 gcc_assert (mode != OImode);
4184 return false;
4186 #endif
4190 /* Create the va_list data type. */
4192 static tree
4193 ix86_build_builtin_va_list_64 (void)
4195 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4197 record = lang_hooks.types.make_type (RECORD_TYPE);
4198 type_decl = build_decl (BUILTINS_LOCATION,
4199 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4201 f_gpr = build_decl (BUILTINS_LOCATION,
4202 FIELD_DECL, get_identifier ("gp_offset"),
4203 unsigned_type_node);
4204 f_fpr = build_decl (BUILTINS_LOCATION,
4205 FIELD_DECL, get_identifier ("fp_offset"),
4206 unsigned_type_node);
4207 f_ovf = build_decl (BUILTINS_LOCATION,
4208 FIELD_DECL, get_identifier ("overflow_arg_area"),
4209 ptr_type_node);
4210 f_sav = build_decl (BUILTINS_LOCATION,
4211 FIELD_DECL, get_identifier ("reg_save_area"),
4212 ptr_type_node);
4214 va_list_gpr_counter_field = f_gpr;
4215 va_list_fpr_counter_field = f_fpr;
4217 DECL_FIELD_CONTEXT (f_gpr) = record;
4218 DECL_FIELD_CONTEXT (f_fpr) = record;
4219 DECL_FIELD_CONTEXT (f_ovf) = record;
4220 DECL_FIELD_CONTEXT (f_sav) = record;
4222 TYPE_STUB_DECL (record) = type_decl;
4223 TYPE_NAME (record) = type_decl;
4224 TYPE_FIELDS (record) = f_gpr;
4225 DECL_CHAIN (f_gpr) = f_fpr;
4226 DECL_CHAIN (f_fpr) = f_ovf;
4227 DECL_CHAIN (f_ovf) = f_sav;
4229 layout_type (record);
4231 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4232 NULL_TREE, TYPE_ATTRIBUTES (record));
4234 /* The correct type is an array type of one element. */
4235 return build_array_type (record, build_index_type (size_zero_node));
4238 /* Setup the builtin va_list data type and for 64-bit the additional
4239 calling convention specific va_list data types. */
4241 static tree
4242 ix86_build_builtin_va_list (void)
4244 if (TARGET_64BIT)
4246 /* Initialize ABI specific va_list builtin types.
4248 In lto1, we can encounter two va_list types:
4249 - one as a result of the type-merge across TUs, and
4250 - the one constructed here.
4251 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4252 a type identity check in canonical_va_list_type based on
4253 TYPE_MAIN_VARIANT (which we used to have) will not work.
4254 Instead, we tag each va_list_type_node with its unique attribute, and
4255 look for the attribute in the type identity check in
4256 canonical_va_list_type.
4258 Tagging sysv_va_list_type_node directly with the attribute is
4259 problematic since it's a array of one record, which will degrade into a
4260 pointer to record when used as parameter (see build_va_arg comments for
4261 an example), dropping the attribute in the process. So we tag the
4262 record instead. */
4264 /* For SYSV_ABI we use an array of one record. */
4265 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4267 /* For MS_ABI we use plain pointer to argument area. */
4268 tree char_ptr_type = build_pointer_type (char_type_node);
4269 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4270 TYPE_ATTRIBUTES (char_ptr_type));
4271 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4273 return ((ix86_abi == MS_ABI)
4274 ? ms_va_list_type_node
4275 : sysv_va_list_type_node);
4277 else
4279 /* For i386 we use plain pointer to argument area. */
4280 return build_pointer_type (char_type_node);
4284 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4286 static void
4287 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4289 rtx save_area, mem;
4290 alias_set_type set;
4291 int i, max;
4293 /* GPR size of varargs save area. */
4294 if (cfun->va_list_gpr_size)
4295 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4296 else
4297 ix86_varargs_gpr_size = 0;
4299 /* FPR size of varargs save area. We don't need it if we don't pass
4300 anything in SSE registers. */
4301 if (TARGET_SSE && cfun->va_list_fpr_size)
4302 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4303 else
4304 ix86_varargs_fpr_size = 0;
4306 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4307 return;
4309 save_area = frame_pointer_rtx;
4310 set = get_varargs_alias_set ();
4312 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4313 if (max > X86_64_REGPARM_MAX)
4314 max = X86_64_REGPARM_MAX;
4316 for (i = cum->regno; i < max; i++)
4318 mem = gen_rtx_MEM (word_mode,
4319 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4320 MEM_NOTRAP_P (mem) = 1;
4321 set_mem_alias_set (mem, set);
4322 emit_move_insn (mem,
4323 gen_rtx_REG (word_mode,
4324 x86_64_int_parameter_registers[i]));
4327 if (ix86_varargs_fpr_size)
4329 machine_mode smode;
4330 rtx_code_label *label;
4331 rtx test;
4333 /* Now emit code to save SSE registers. The AX parameter contains number
4334 of SSE parameter registers used to call this function, though all we
4335 actually check here is the zero/non-zero status. */
4337 label = gen_label_rtx ();
4338 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4339 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4340 label));
4342 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4343 we used movdqa (i.e. TImode) instead? Perhaps even better would
4344 be if we could determine the real mode of the data, via a hook
4345 into pass_stdarg. Ignore all that for now. */
4346 smode = V4SFmode;
4347 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4348 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4350 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4351 if (max > X86_64_SSE_REGPARM_MAX)
4352 max = X86_64_SSE_REGPARM_MAX;
4354 for (i = cum->sse_regno; i < max; ++i)
4356 mem = plus_constant (Pmode, save_area,
4357 i * 16 + ix86_varargs_gpr_size);
4358 mem = gen_rtx_MEM (smode, mem);
4359 MEM_NOTRAP_P (mem) = 1;
4360 set_mem_alias_set (mem, set);
4361 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4363 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4366 emit_label (label);
4370 static void
4371 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4373 alias_set_type set = get_varargs_alias_set ();
4374 int i;
4376 /* Reset to zero, as there might be a sysv vaarg used
4377 before. */
4378 ix86_varargs_gpr_size = 0;
4379 ix86_varargs_fpr_size = 0;
4381 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4383 rtx reg, mem;
4385 mem = gen_rtx_MEM (Pmode,
4386 plus_constant (Pmode, virtual_incoming_args_rtx,
4387 i * UNITS_PER_WORD));
4388 MEM_NOTRAP_P (mem) = 1;
4389 set_mem_alias_set (mem, set);
4391 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4392 emit_move_insn (mem, reg);
4396 static void
4397 ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4398 const function_arg_info &arg,
4399 int *, int no_rtl)
4401 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4402 CUMULATIVE_ARGS next_cum;
4403 tree fntype;
4405 /* This argument doesn't appear to be used anymore. Which is good,
4406 because the old code here didn't suppress rtl generation. */
4407 gcc_assert (!no_rtl);
4409 if (!TARGET_64BIT)
4410 return;
4412 fntype = TREE_TYPE (current_function_decl);
4414 /* For varargs, we do not want to skip the dummy va_dcl argument.
4415 For stdargs, we do want to skip the last named argument. */
4416 next_cum = *cum;
4417 if (stdarg_p (fntype))
4418 ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4420 if (cum->call_abi == MS_ABI)
4421 setup_incoming_varargs_ms_64 (&next_cum);
4422 else
4423 setup_incoming_varargs_64 (&next_cum);
4426 /* Checks if TYPE is of kind va_list char *. */
4428 static bool
4429 is_va_list_char_pointer (tree type)
4431 tree canonic;
4433 /* For 32-bit it is always true. */
4434 if (!TARGET_64BIT)
4435 return true;
4436 canonic = ix86_canonical_va_list_type (type);
4437 return (canonic == ms_va_list_type_node
4438 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4441 /* Implement va_start. */
4443 static void
4444 ix86_va_start (tree valist, rtx nextarg)
4446 HOST_WIDE_INT words, n_gpr, n_fpr;
4447 tree f_gpr, f_fpr, f_ovf, f_sav;
4448 tree gpr, fpr, ovf, sav, t;
4449 tree type;
4450 rtx ovf_rtx;
4452 if (flag_split_stack
4453 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4455 unsigned int scratch_regno;
4457 /* When we are splitting the stack, we can't refer to the stack
4458 arguments using internal_arg_pointer, because they may be on
4459 the old stack. The split stack prologue will arrange to
4460 leave a pointer to the old stack arguments in a scratch
4461 register, which we here copy to a pseudo-register. The split
4462 stack prologue can't set the pseudo-register directly because
4463 it (the prologue) runs before any registers have been saved. */
4465 scratch_regno = split_stack_prologue_scratch_regno ();
4466 if (scratch_regno != INVALID_REGNUM)
4468 rtx reg;
4469 rtx_insn *seq;
4471 reg = gen_reg_rtx (Pmode);
4472 cfun->machine->split_stack_varargs_pointer = reg;
4474 start_sequence ();
4475 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4476 seq = get_insns ();
4477 end_sequence ();
4479 push_topmost_sequence ();
4480 emit_insn_after (seq, entry_of_function ());
4481 pop_topmost_sequence ();
4485 /* Only 64bit target needs something special. */
4486 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4488 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4489 std_expand_builtin_va_start (valist, nextarg);
4490 else
4492 rtx va_r, next;
4494 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4495 next = expand_binop (ptr_mode, add_optab,
4496 cfun->machine->split_stack_varargs_pointer,
4497 crtl->args.arg_offset_rtx,
4498 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4499 convert_move (va_r, next, 0);
4501 return;
4504 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4505 f_fpr = DECL_CHAIN (f_gpr);
4506 f_ovf = DECL_CHAIN (f_fpr);
4507 f_sav = DECL_CHAIN (f_ovf);
4509 valist = build_simple_mem_ref (valist);
4510 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4511 /* The following should be folded into the MEM_REF offset. */
4512 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4513 f_gpr, NULL_TREE);
4514 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4515 f_fpr, NULL_TREE);
4516 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4517 f_ovf, NULL_TREE);
4518 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4519 f_sav, NULL_TREE);
4521 /* Count number of gp and fp argument registers used. */
4522 words = crtl->args.info.words;
4523 n_gpr = crtl->args.info.regno;
4524 n_fpr = crtl->args.info.sse_regno;
4526 if (cfun->va_list_gpr_size)
4528 type = TREE_TYPE (gpr);
4529 t = build2 (MODIFY_EXPR, type,
4530 gpr, build_int_cst (type, n_gpr * 8));
4531 TREE_SIDE_EFFECTS (t) = 1;
4532 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4535 if (TARGET_SSE && cfun->va_list_fpr_size)
4537 type = TREE_TYPE (fpr);
4538 t = build2 (MODIFY_EXPR, type, fpr,
4539 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4540 TREE_SIDE_EFFECTS (t) = 1;
4541 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4544 /* Find the overflow area. */
4545 type = TREE_TYPE (ovf);
4546 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4547 ovf_rtx = crtl->args.internal_arg_pointer;
4548 else
4549 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4550 t = make_tree (type, ovf_rtx);
4551 if (words != 0)
4552 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4554 t = build2 (MODIFY_EXPR, type, ovf, t);
4555 TREE_SIDE_EFFECTS (t) = 1;
4556 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4558 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4560 /* Find the register save area.
4561 Prologue of the function save it right above stack frame. */
4562 type = TREE_TYPE (sav);
4563 t = make_tree (type, frame_pointer_rtx);
4564 if (!ix86_varargs_gpr_size)
4565 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4567 t = build2 (MODIFY_EXPR, type, sav, t);
4568 TREE_SIDE_EFFECTS (t) = 1;
4569 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4573 /* Implement va_arg. */
4575 static tree
4576 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4577 gimple_seq *post_p)
4579 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4580 tree f_gpr, f_fpr, f_ovf, f_sav;
4581 tree gpr, fpr, ovf, sav, t;
4582 int size, rsize;
4583 tree lab_false, lab_over = NULL_TREE;
4584 tree addr, t2;
4585 rtx container;
4586 int indirect_p = 0;
4587 tree ptrtype;
4588 machine_mode nat_mode;
4589 unsigned int arg_boundary;
4590 unsigned int type_align;
4592 /* Only 64bit target needs something special. */
4593 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4594 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4596 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4597 f_fpr = DECL_CHAIN (f_gpr);
4598 f_ovf = DECL_CHAIN (f_fpr);
4599 f_sav = DECL_CHAIN (f_ovf);
4601 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4602 valist, f_gpr, NULL_TREE);
4604 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4605 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4606 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4608 indirect_p = pass_va_arg_by_reference (type);
4609 if (indirect_p)
4610 type = build_pointer_type (type);
4611 size = arg_int_size_in_bytes (type);
4612 rsize = CEIL (size, UNITS_PER_WORD);
4614 nat_mode = type_natural_mode (type, NULL, false);
4615 switch (nat_mode)
4617 case E_V8SFmode:
4618 case E_V8SImode:
4619 case E_V32QImode:
4620 case E_V16HImode:
4621 case E_V4DFmode:
4622 case E_V4DImode:
4623 case E_V16SFmode:
4624 case E_V16SImode:
4625 case E_V64QImode:
4626 case E_V32HImode:
4627 case E_V8DFmode:
4628 case E_V8DImode:
4629 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4630 if (!TARGET_64BIT_MS_ABI)
4632 container = NULL;
4633 break;
4635 /* FALLTHRU */
4637 default:
4638 container = construct_container (nat_mode, TYPE_MODE (type),
4639 type, 0, X86_64_REGPARM_MAX,
4640 X86_64_SSE_REGPARM_MAX, intreg,
4642 break;
4645 /* Pull the value out of the saved registers. */
4647 addr = create_tmp_var (ptr_type_node, "addr");
4648 type_align = TYPE_ALIGN (type);
4650 if (container)
4652 int needed_intregs, needed_sseregs;
4653 bool need_temp;
4654 tree int_addr, sse_addr;
4656 lab_false = create_artificial_label (UNKNOWN_LOCATION);
4657 lab_over = create_artificial_label (UNKNOWN_LOCATION);
4659 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4661 need_temp = (!REG_P (container)
4662 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4663 || TYPE_ALIGN (type) > 128));
4665 /* In case we are passing structure, verify that it is consecutive block
4666 on the register save area. If not we need to do moves. */
4667 if (!need_temp && !REG_P (container))
4669 /* Verify that all registers are strictly consecutive */
4670 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4672 int i;
4674 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4676 rtx slot = XVECEXP (container, 0, i);
4677 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4678 || INTVAL (XEXP (slot, 1)) != i * 16)
4679 need_temp = true;
4682 else
4684 int i;
4686 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4688 rtx slot = XVECEXP (container, 0, i);
4689 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4690 || INTVAL (XEXP (slot, 1)) != i * 8)
4691 need_temp = true;
4695 if (!need_temp)
4697 int_addr = addr;
4698 sse_addr = addr;
4700 else
4702 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4703 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4706 /* First ensure that we fit completely in registers. */
4707 if (needed_intregs)
4709 t = build_int_cst (TREE_TYPE (gpr),
4710 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4711 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4712 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4713 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4714 gimplify_and_add (t, pre_p);
4716 if (needed_sseregs)
4718 t = build_int_cst (TREE_TYPE (fpr),
4719 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4720 + X86_64_REGPARM_MAX * 8);
4721 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4722 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4723 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4724 gimplify_and_add (t, pre_p);
4727 /* Compute index to start of area used for integer regs. */
4728 if (needed_intregs)
4730 /* int_addr = gpr + sav; */
4731 t = fold_build_pointer_plus (sav, gpr);
4732 gimplify_assign (int_addr, t, pre_p);
4734 if (needed_sseregs)
4736 /* sse_addr = fpr + sav; */
4737 t = fold_build_pointer_plus (sav, fpr);
4738 gimplify_assign (sse_addr, t, pre_p);
4740 if (need_temp)
4742 int i, prev_size = 0;
4743 tree temp = create_tmp_var (type, "va_arg_tmp");
4745 /* addr = &temp; */
4746 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4747 gimplify_assign (addr, t, pre_p);
4749 for (i = 0; i < XVECLEN (container, 0); i++)
4751 rtx slot = XVECEXP (container, 0, i);
4752 rtx reg = XEXP (slot, 0);
4753 machine_mode mode = GET_MODE (reg);
4754 tree piece_type;
4755 tree addr_type;
4756 tree daddr_type;
4757 tree src_addr, src;
4758 int src_offset;
4759 tree dest_addr, dest;
4760 int cur_size = GET_MODE_SIZE (mode);
4762 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
4763 prev_size = INTVAL (XEXP (slot, 1));
4764 if (prev_size + cur_size > size)
4766 cur_size = size - prev_size;
4767 unsigned int nbits = cur_size * BITS_PER_UNIT;
4768 if (!int_mode_for_size (nbits, 1).exists (&mode))
4769 mode = QImode;
4771 piece_type = lang_hooks.types.type_for_mode (mode, 1);
4772 if (mode == GET_MODE (reg))
4773 addr_type = build_pointer_type (piece_type);
4774 else
4775 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4776 true);
4777 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4778 true);
4780 if (SSE_REGNO_P (REGNO (reg)))
4782 src_addr = sse_addr;
4783 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4785 else
4787 src_addr = int_addr;
4788 src_offset = REGNO (reg) * 8;
4790 src_addr = fold_convert (addr_type, src_addr);
4791 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
4793 dest_addr = fold_convert (daddr_type, addr);
4794 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
4795 if (cur_size == GET_MODE_SIZE (mode))
4797 src = build_va_arg_indirect_ref (src_addr);
4798 dest = build_va_arg_indirect_ref (dest_addr);
4800 gimplify_assign (dest, src, pre_p);
4802 else
4804 tree copy
4805 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
4806 3, dest_addr, src_addr,
4807 size_int (cur_size));
4808 gimplify_and_add (copy, pre_p);
4810 prev_size += cur_size;
4814 if (needed_intregs)
4816 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4817 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4818 gimplify_assign (gpr, t, pre_p);
4819 /* The GPR save area guarantees only 8-byte alignment. */
4820 if (!need_temp)
4821 type_align = MIN (type_align, 64);
4824 if (needed_sseregs)
4826 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4827 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4828 gimplify_assign (unshare_expr (fpr), t, pre_p);
4831 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
4833 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
4836 /* ... otherwise out of the overflow area. */
4838 /* When we align parameter on stack for caller, if the parameter
4839 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
4840 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
4841 here with caller. */
4842 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
4843 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
4844 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
4846 /* Care for on-stack alignment if needed. */
4847 if (arg_boundary <= 64 || size == 0)
4848 t = ovf;
4849 else
4851 HOST_WIDE_INT align = arg_boundary / 8;
4852 t = fold_build_pointer_plus_hwi (ovf, align - 1);
4853 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4854 build_int_cst (TREE_TYPE (t), -align));
4857 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4858 gimplify_assign (addr, t, pre_p);
4860 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
4861 gimplify_assign (unshare_expr (ovf), t, pre_p);
4863 if (container)
4864 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
4866 type = build_aligned_type (type, type_align);
4867 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
4868 addr = fold_convert (ptrtype, addr);
4870 if (indirect_p)
4871 addr = build_va_arg_indirect_ref (addr);
4872 return build_va_arg_indirect_ref (addr);
4875 /* Return true if OPNUM's MEM should be matched
4876 in movabs* patterns. */
4878 bool
4879 ix86_check_movabs (rtx insn, int opnum)
4881 rtx set, mem;
4883 set = PATTERN (insn);
4884 if (GET_CODE (set) == PARALLEL)
4885 set = XVECEXP (set, 0, 0);
4886 gcc_assert (GET_CODE (set) == SET);
4887 mem = XEXP (set, opnum);
4888 while (SUBREG_P (mem))
4889 mem = SUBREG_REG (mem);
4890 gcc_assert (MEM_P (mem));
4891 return volatile_ok || !MEM_VOLATILE_P (mem);
4894 /* Return false if INSN contains a MEM with a non-default address space. */
4895 bool
4896 ix86_check_no_addr_space (rtx insn)
4898 subrtx_var_iterator::array_type array;
4899 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
4901 rtx x = *iter;
4902 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
4903 return false;
4905 return true;
4908 /* Initialize the table of extra 80387 mathematical constants. */
4910 static void
4911 init_ext_80387_constants (void)
4913 static const char * cst[5] =
4915 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4916 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4917 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4918 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4919 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4921 int i;
4923 for (i = 0; i < 5; i++)
4925 real_from_string (&ext_80387_constants_table[i], cst[i]);
4926 /* Ensure each constant is rounded to XFmode precision. */
4927 real_convert (&ext_80387_constants_table[i],
4928 XFmode, &ext_80387_constants_table[i]);
4931 ext_80387_constants_init = 1;
4934 /* Return non-zero if the constant is something that
4935 can be loaded with a special instruction. */
4938 standard_80387_constant_p (rtx x)
4940 machine_mode mode = GET_MODE (x);
4942 const REAL_VALUE_TYPE *r;
4944 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
4945 return -1;
4947 if (x == CONST0_RTX (mode))
4948 return 1;
4949 if (x == CONST1_RTX (mode))
4950 return 2;
4952 r = CONST_DOUBLE_REAL_VALUE (x);
4954 /* For XFmode constants, try to find a special 80387 instruction when
4955 optimizing for size or on those CPUs that benefit from them. */
4956 if (mode == XFmode
4957 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
4959 int i;
4961 if (! ext_80387_constants_init)
4962 init_ext_80387_constants ();
4964 for (i = 0; i < 5; i++)
4965 if (real_identical (r, &ext_80387_constants_table[i]))
4966 return i + 3;
4969 /* Load of the constant -0.0 or -1.0 will be split as
4970 fldz;fchs or fld1;fchs sequence. */
4971 if (real_isnegzero (r))
4972 return 8;
4973 if (real_identical (r, &dconstm1))
4974 return 9;
4976 return 0;
4979 /* Return the opcode of the special instruction to be used to load
4980 the constant X. */
4982 const char *
4983 standard_80387_constant_opcode (rtx x)
4985 switch (standard_80387_constant_p (x))
4987 case 1:
4988 return "fldz";
4989 case 2:
4990 return "fld1";
4991 case 3:
4992 return "fldlg2";
4993 case 4:
4994 return "fldln2";
4995 case 5:
4996 return "fldl2e";
4997 case 6:
4998 return "fldl2t";
4999 case 7:
5000 return "fldpi";
5001 case 8:
5002 case 9:
5003 return "#";
5004 default:
5005 gcc_unreachable ();
5009 /* Return the CONST_DOUBLE representing the 80387 constant that is
5010 loaded by the specified special instruction. The argument IDX
5011 matches the return value from standard_80387_constant_p. */
5014 standard_80387_constant_rtx (int idx)
5016 int i;
5018 if (! ext_80387_constants_init)
5019 init_ext_80387_constants ();
5021 switch (idx)
5023 case 3:
5024 case 4:
5025 case 5:
5026 case 6:
5027 case 7:
5028 i = idx - 3;
5029 break;
5031 default:
5032 gcc_unreachable ();
5035 return const_double_from_real_value (ext_80387_constants_table[i],
5036 XFmode);
5039 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
5040 in supported SSE/AVX vector mode. */
5043 standard_sse_constant_p (rtx x, machine_mode pred_mode)
5045 machine_mode mode;
5047 if (!TARGET_SSE)
5048 return 0;
5050 mode = GET_MODE (x);
5052 if (x == const0_rtx || const0_operand (x, mode))
5053 return 1;
5055 if (x == constm1_rtx || vector_all_ones_operand (x, mode))
5057 /* VOIDmode integer constant, get mode from the predicate. */
5058 if (mode == VOIDmode)
5059 mode = pred_mode;
5061 switch (GET_MODE_SIZE (mode))
5063 case 64:
5064 if (TARGET_AVX512F)
5065 return 2;
5066 break;
5067 case 32:
5068 if (TARGET_AVX2)
5069 return 2;
5070 break;
5071 case 16:
5072 if (TARGET_SSE2)
5073 return 2;
5074 break;
5075 case 0:
5076 /* VOIDmode */
5077 gcc_unreachable ();
5078 default:
5079 break;
5083 return 0;
5086 /* Return the opcode of the special instruction to be used to load
5087 the constant operands[1] into operands[0]. */
5089 const char *
5090 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5092 machine_mode mode;
5093 rtx x = operands[1];
5095 gcc_assert (TARGET_SSE);
5097 mode = GET_MODE (x);
5099 if (x == const0_rtx || const0_operand (x, mode))
5101 switch (get_attr_mode (insn))
5103 case MODE_TI:
5104 if (!EXT_REX_SSE_REG_P (operands[0]))
5105 return "%vpxor\t%0, %d0";
5106 /* FALLTHRU */
5107 case MODE_XI:
5108 case MODE_OI:
5109 if (EXT_REX_SSE_REG_P (operands[0]))
5110 return (TARGET_AVX512VL
5111 ? "vpxord\t%x0, %x0, %x0"
5112 : "vpxord\t%g0, %g0, %g0");
5113 return "vpxor\t%x0, %x0, %x0";
5115 case MODE_V2DF:
5116 if (!EXT_REX_SSE_REG_P (operands[0]))
5117 return "%vxorpd\t%0, %d0";
5118 /* FALLTHRU */
5119 case MODE_V8DF:
5120 case MODE_V4DF:
5121 if (!EXT_REX_SSE_REG_P (operands[0]))
5122 return "vxorpd\t%x0, %x0, %x0";
5123 else if (TARGET_AVX512DQ)
5124 return (TARGET_AVX512VL
5125 ? "vxorpd\t%x0, %x0, %x0"
5126 : "vxorpd\t%g0, %g0, %g0");
5127 else
5128 return (TARGET_AVX512VL
5129 ? "vpxorq\t%x0, %x0, %x0"
5130 : "vpxorq\t%g0, %g0, %g0");
5132 case MODE_V4SF:
5133 if (!EXT_REX_SSE_REG_P (operands[0]))
5134 return "%vxorps\t%0, %d0";
5135 /* FALLTHRU */
5136 case MODE_V16SF:
5137 case MODE_V8SF:
5138 if (!EXT_REX_SSE_REG_P (operands[0]))
5139 return "vxorps\t%x0, %x0, %x0";
5140 else if (TARGET_AVX512DQ)
5141 return (TARGET_AVX512VL
5142 ? "vxorps\t%x0, %x0, %x0"
5143 : "vxorps\t%g0, %g0, %g0");
5144 else
5145 return (TARGET_AVX512VL
5146 ? "vpxord\t%x0, %x0, %x0"
5147 : "vpxord\t%g0, %g0, %g0");
5149 default:
5150 gcc_unreachable ();
5153 else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
5155 enum attr_mode insn_mode = get_attr_mode (insn);
5157 switch (insn_mode)
5159 case MODE_XI:
5160 case MODE_V8DF:
5161 case MODE_V16SF:
5162 gcc_assert (TARGET_AVX512F);
5163 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5165 case MODE_OI:
5166 case MODE_V4DF:
5167 case MODE_V8SF:
5168 gcc_assert (TARGET_AVX2);
5169 /* FALLTHRU */
5170 case MODE_TI:
5171 case MODE_V2DF:
5172 case MODE_V4SF:
5173 gcc_assert (TARGET_SSE2);
5174 if (!EXT_REX_SSE_REG_P (operands[0]))
5175 return (TARGET_AVX
5176 ? "vpcmpeqd\t%0, %0, %0"
5177 : "pcmpeqd\t%0, %0");
5178 else if (TARGET_AVX512VL)
5179 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5180 else
5181 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5183 default:
5184 gcc_unreachable ();
5188 gcc_unreachable ();
5191 /* Returns true if INSN can be transformed from a memory load
5192 to a supported FP constant load. */
5194 bool
5195 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5197 rtx src = find_constant_src (insn);
5199 gcc_assert (REG_P (dst));
5201 if (src == NULL
5202 || (SSE_REGNO_P (REGNO (dst))
5203 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
5204 || (STACK_REGNO_P (REGNO (dst))
5205 && standard_80387_constant_p (src) < 1))
5206 return false;
5208 return true;
5211 /* Predicate for pre-reload splitters with associated instructions,
5212 which can match any time before the split1 pass (usually combine),
5213 then are unconditionally split in that pass and should not be
5214 matched again afterwards. */
5216 bool
5217 ix86_pre_reload_split (void)
5219 return (can_create_pseudo_p ()
5220 && !(cfun->curr_properties & PROP_rtl_split_insns));
5223 /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5224 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5225 TARGET_AVX512VL or it is a register to register move which can
5226 be done with zmm register move. */
5228 static const char *
5229 ix86_get_ssemov (rtx *operands, unsigned size,
5230 enum attr_mode insn_mode, machine_mode mode)
5232 char buf[128];
5233 bool misaligned_p = (misaligned_operand (operands[0], mode)
5234 || misaligned_operand (operands[1], mode));
5235 bool evex_reg_p = (size == 64
5236 || EXT_REX_SSE_REG_P (operands[0])
5237 || EXT_REX_SSE_REG_P (operands[1]));
5238 machine_mode scalar_mode;
5240 const char *opcode = NULL;
5241 enum
5243 opcode_int,
5244 opcode_float,
5245 opcode_double
5246 } type = opcode_int;
5248 switch (insn_mode)
5250 case MODE_V16SF:
5251 case MODE_V8SF:
5252 case MODE_V4SF:
5253 scalar_mode = E_SFmode;
5254 type = opcode_float;
5255 break;
5256 case MODE_V8DF:
5257 case MODE_V4DF:
5258 case MODE_V2DF:
5259 scalar_mode = E_DFmode;
5260 type = opcode_double;
5261 break;
5262 case MODE_XI:
5263 case MODE_OI:
5264 case MODE_TI:
5265 scalar_mode = GET_MODE_INNER (mode);
5266 break;
5267 default:
5268 gcc_unreachable ();
5271 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5272 we can only use zmm register move without memory operand. */
5273 if (evex_reg_p
5274 && !TARGET_AVX512VL
5275 && GET_MODE_SIZE (mode) < 64)
5277 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5278 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5279 AVX512VL is disabled, LRA can still generate reg to
5280 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5281 modes. */
5282 if (memory_operand (operands[0], mode)
5283 || memory_operand (operands[1], mode))
5284 gcc_unreachable ();
5285 size = 64;
5286 switch (type)
5288 case opcode_int:
5289 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5290 break;
5291 case opcode_float:
5292 opcode = misaligned_p ? "vmovups" : "vmovaps";
5293 break;
5294 case opcode_double:
5295 opcode = misaligned_p ? "vmovupd" : "vmovapd";
5296 break;
5299 else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5301 switch (scalar_mode)
5303 case E_SFmode:
5304 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5305 break;
5306 case E_DFmode:
5307 opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5308 break;
5309 case E_TFmode:
5310 if (evex_reg_p)
5311 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5312 else
5313 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5314 break;
5315 default:
5316 gcc_unreachable ();
5319 else if (SCALAR_INT_MODE_P (scalar_mode))
5321 switch (scalar_mode)
5323 case E_QImode:
5324 if (evex_reg_p)
5325 opcode = (misaligned_p
5326 ? (TARGET_AVX512BW
5327 ? "vmovdqu8"
5328 : "vmovdqu64")
5329 : "vmovdqa64");
5330 else
5331 opcode = (misaligned_p
5332 ? (TARGET_AVX512BW
5333 ? "vmovdqu8"
5334 : "%vmovdqu")
5335 : "%vmovdqa");
5336 break;
5337 case E_HImode:
5338 if (evex_reg_p)
5339 opcode = (misaligned_p
5340 ? (TARGET_AVX512BW
5341 ? "vmovdqu16"
5342 : "vmovdqu64")
5343 : "vmovdqa64");
5344 else
5345 opcode = (misaligned_p
5346 ? (TARGET_AVX512BW
5347 ? "vmovdqu16"
5348 : "%vmovdqu")
5349 : "%vmovdqa");
5350 break;
5351 case E_SImode:
5352 if (evex_reg_p)
5353 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5354 else
5355 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5356 break;
5357 case E_DImode:
5358 case E_TImode:
5359 case E_OImode:
5360 if (evex_reg_p)
5361 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5362 else
5363 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5364 break;
5365 case E_XImode:
5366 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5367 break;
5368 default:
5369 gcc_unreachable ();
5372 else
5373 gcc_unreachable ();
5375 switch (size)
5377 case 64:
5378 snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5379 opcode);
5380 break;
5381 case 32:
5382 snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5383 opcode);
5384 break;
5385 case 16:
5386 snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5387 opcode);
5388 break;
5389 default:
5390 gcc_unreachable ();
5392 output_asm_insn (buf, operands);
5393 return "";
5396 /* Return the template of the TYPE_SSEMOV instruction to move
5397 operands[1] into operands[0]. */
5399 const char *
5400 ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5402 machine_mode mode = GET_MODE (operands[0]);
5403 if (get_attr_type (insn) != TYPE_SSEMOV
5404 || mode != GET_MODE (operands[1]))
5405 gcc_unreachable ();
5407 enum attr_mode insn_mode = get_attr_mode (insn);
5409 switch (insn_mode)
5411 case MODE_XI:
5412 case MODE_V8DF:
5413 case MODE_V16SF:
5414 return ix86_get_ssemov (operands, 64, insn_mode, mode);
5416 case MODE_OI:
5417 case MODE_V4DF:
5418 case MODE_V8SF:
5419 return ix86_get_ssemov (operands, 32, insn_mode, mode);
5421 case MODE_TI:
5422 case MODE_V2DF:
5423 case MODE_V4SF:
5424 return ix86_get_ssemov (operands, 16, insn_mode, mode);
5426 case MODE_DI:
5427 /* Handle broken assemblers that require movd instead of movq. */
5428 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
5429 && (GENERAL_REG_P (operands[0])
5430 || GENERAL_REG_P (operands[1])))
5431 return "%vmovd\t{%1, %0|%0, %1}";
5432 else
5433 return "%vmovq\t{%1, %0|%0, %1}";
5435 case MODE_SI:
5436 return "%vmovd\t{%1, %0|%0, %1}";
5438 case MODE_DF:
5439 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5440 return "vmovsd\t{%d1, %0|%0, %d1}";
5441 else
5442 return "%vmovsd\t{%1, %0|%0, %1}";
5444 case MODE_SF:
5445 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5446 return "vmovss\t{%d1, %0|%0, %d1}";
5447 else
5448 return "%vmovss\t{%1, %0|%0, %1}";
5450 case MODE_V1DF:
5451 gcc_assert (!TARGET_AVX);
5452 return "movlpd\t{%1, %0|%0, %1}";
5454 case MODE_V2SF:
5455 if (TARGET_AVX && REG_P (operands[0]))
5456 return "vmovlps\t{%1, %d0|%d0, %1}";
5457 else
5458 return "%vmovlps\t{%1, %0|%0, %1}";
5460 default:
5461 gcc_unreachable ();
5465 /* Returns true if OP contains a symbol reference */
5467 bool
5468 symbolic_reference_mentioned_p (rtx op)
5470 const char *fmt;
5471 int i;
5473 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5474 return true;
5476 fmt = GET_RTX_FORMAT (GET_CODE (op));
5477 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5479 if (fmt[i] == 'E')
5481 int j;
5483 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5484 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5485 return true;
5488 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5489 return true;
5492 return false;
5495 /* Return true if it is appropriate to emit `ret' instructions in the
5496 body of a function. Do this only if the epilogue is simple, needing a
5497 couple of insns. Prior to reloading, we can't tell how many registers
5498 must be saved, so return false then. Return false if there is no frame
5499 marker to de-allocate. */
5501 bool
5502 ix86_can_use_return_insn_p (void)
5504 if (ix86_function_naked (current_function_decl))
5505 return false;
5507 /* Don't use `ret' instruction in interrupt handler. */
5508 if (! reload_completed
5509 || frame_pointer_needed
5510 || cfun->machine->func_type != TYPE_NORMAL)
5511 return 0;
5513 /* Don't allow more than 32k pop, since that's all we can do
5514 with one instruction. */
5515 if (crtl->args.pops_args && crtl->args.size >= 32768)
5516 return 0;
5518 struct ix86_frame &frame = cfun->machine->frame;
5519 return (frame.stack_pointer_offset == UNITS_PER_WORD
5520 && (frame.nregs + frame.nsseregs) == 0);
5523 /* Return stack frame size. get_frame_size () returns used stack slots
5524 during compilation, which may be optimized out later. If stack frame
5525 is needed, stack_frame_required should be true. */
5527 static HOST_WIDE_INT
5528 ix86_get_frame_size (void)
5530 if (cfun->machine->stack_frame_required)
5531 return get_frame_size ();
5532 else
5533 return 0;
5536 /* Value should be nonzero if functions must have frame pointers.
5537 Zero means the frame pointer need not be set up (and parms may
5538 be accessed via the stack pointer) in functions that seem suitable. */
5540 static bool
5541 ix86_frame_pointer_required (void)
5543 /* If we accessed previous frames, then the generated code expects
5544 to be able to access the saved ebp value in our frame. */
5545 if (cfun->machine->accesses_prev_frame)
5546 return true;
5548 /* Several x86 os'es need a frame pointer for other reasons,
5549 usually pertaining to setjmp. */
5550 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5551 return true;
5553 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5554 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
5555 return true;
5557 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5558 allocation is 4GB. */
5559 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
5560 return true;
5562 /* SSE saves require frame-pointer when stack is misaligned. */
5563 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
5564 return true;
5566 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5567 turns off the frame pointer by default. Turn it back on now if
5568 we've not got a leaf function. */
5569 if (TARGET_OMIT_LEAF_FRAME_POINTER
5570 && (!crtl->is_leaf
5571 || ix86_current_function_calls_tls_descriptor))
5572 return true;
5574 /* Several versions of mcount for the x86 assumes that there is a
5575 frame, so we cannot allow profiling without a frame pointer. */
5576 if (crtl->profile && !flag_fentry)
5577 return true;
5579 return false;
5582 /* Record that the current function accesses previous call frames. */
5584 void
5585 ix86_setup_frame_addresses (void)
5587 cfun->machine->accesses_prev_frame = 1;
5590 #ifndef USE_HIDDEN_LINKONCE
5591 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5592 # define USE_HIDDEN_LINKONCE 1
5593 # else
5594 # define USE_HIDDEN_LINKONCE 0
5595 # endif
5596 #endif
5598 /* Label count for call and return thunks. It is used to make unique
5599 labels in call and return thunks. */
5600 static int indirectlabelno;
5602 /* True if call thunk function is needed. */
5603 static bool indirect_thunk_needed = false;
5605 /* Bit masks of integer registers, which contain branch target, used
5606 by call thunk functions. */
5607 static int indirect_thunks_used;
5609 /* True if return thunk function is needed. */
5610 static bool indirect_return_needed = false;
5612 /* True if return thunk function via CX is needed. */
5613 static bool indirect_return_via_cx;
5615 #ifndef INDIRECT_LABEL
5616 # define INDIRECT_LABEL "LIND"
5617 #endif
5619 /* Indicate what prefix is needed for an indirect branch. */
5620 enum indirect_thunk_prefix
5622 indirect_thunk_prefix_none,
5623 indirect_thunk_prefix_nt
5626 /* Return the prefix needed for an indirect branch INSN. */
5628 enum indirect_thunk_prefix
5629 indirect_thunk_need_prefix (rtx_insn *insn)
5631 enum indirect_thunk_prefix need_prefix;
5632 if ((cfun->machine->indirect_branch_type
5633 == indirect_branch_thunk_extern)
5634 && ix86_notrack_prefixed_insn_p (insn))
5636 /* NOTRACK prefix is only used with external thunk so that it
5637 can be properly updated to support CET at run-time. */
5638 need_prefix = indirect_thunk_prefix_nt;
5640 else
5641 need_prefix = indirect_thunk_prefix_none;
5642 return need_prefix;
5645 /* Fills in the label name that should be used for the indirect thunk. */
5647 static void
5648 indirect_thunk_name (char name[32], unsigned int regno,
5649 enum indirect_thunk_prefix need_prefix,
5650 bool ret_p)
5652 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
5653 gcc_unreachable ();
5655 if (USE_HIDDEN_LINKONCE)
5657 const char *prefix;
5659 if (need_prefix == indirect_thunk_prefix_nt
5660 && regno != INVALID_REGNUM)
5662 /* NOTRACK prefix is only used with external thunk via
5663 register so that NOTRACK prefix can be added to indirect
5664 branch via register to support CET at run-time. */
5665 prefix = "_nt";
5667 else
5668 prefix = "";
5670 const char *ret = ret_p ? "return" : "indirect";
5672 if (regno != INVALID_REGNUM)
5674 const char *reg_prefix;
5675 if (LEGACY_INT_REGNO_P (regno))
5676 reg_prefix = TARGET_64BIT ? "r" : "e";
5677 else
5678 reg_prefix = "";
5679 sprintf (name, "__x86_%s_thunk%s_%s%s",
5680 ret, prefix, reg_prefix, reg_names[regno]);
5682 else
5683 sprintf (name, "__x86_%s_thunk%s", ret, prefix);
5685 else
5687 if (regno != INVALID_REGNUM)
5688 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
5689 else
5691 if (ret_p)
5692 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
5693 else
5694 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
5699 /* Output a call and return thunk for indirect branch. If REGNO != -1,
5700 the function address is in REGNO and the call and return thunk looks like:
5702 call L2
5704 pause
5705 lfence
5706 jmp L1
5708 mov %REG, (%sp)
5711 Otherwise, the function address is on the top of stack and the
5712 call and return thunk looks like:
5714 call L2
5716 pause
5717 lfence
5718 jmp L1
5720 lea WORD_SIZE(%sp), %sp
5724 static void
5725 output_indirect_thunk (unsigned int regno)
5727 char indirectlabel1[32];
5728 char indirectlabel2[32];
5730 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
5731 indirectlabelno++);
5732 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
5733 indirectlabelno++);
5735 /* Call */
5736 fputs ("\tcall\t", asm_out_file);
5737 assemble_name_raw (asm_out_file, indirectlabel2);
5738 fputc ('\n', asm_out_file);
5740 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
5742 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
5743 Usage of both pause + lfence is compromise solution. */
5744 fprintf (asm_out_file, "\tpause\n\tlfence\n");
5746 /* Jump. */
5747 fputs ("\tjmp\t", asm_out_file);
5748 assemble_name_raw (asm_out_file, indirectlabel1);
5749 fputc ('\n', asm_out_file);
5751 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
5753 /* The above call insn pushed a word to stack. Adjust CFI info. */
5754 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
5756 if (! dwarf2out_do_cfi_asm ())
5758 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5759 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
5760 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
5761 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5763 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5764 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
5765 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
5766 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5767 dwarf2out_emit_cfi (xcfi);
5770 if (regno != INVALID_REGNUM)
5772 /* MOV. */
5773 rtx xops[2];
5774 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
5775 xops[1] = gen_rtx_REG (word_mode, regno);
5776 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
5778 else
5780 /* LEA. */
5781 rtx xops[2];
5782 xops[0] = stack_pointer_rtx;
5783 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
5784 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
5787 fputs ("\tret\n", asm_out_file);
5790 /* Output a funtion with a call and return thunk for indirect branch.
5791 If REGNO != INVALID_REGNUM, the function address is in REGNO.
5792 Otherwise, the function address is on the top of stack. Thunk is
5793 used for function return if RET_P is true. */
5795 static void
5796 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
5797 unsigned int regno, bool ret_p)
5799 char name[32];
5800 tree decl;
5802 /* Create __x86_indirect_thunk. */
5803 indirect_thunk_name (name, regno, need_prefix, ret_p);
5804 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5805 get_identifier (name),
5806 build_function_type_list (void_type_node, NULL_TREE));
5807 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5808 NULL_TREE, void_type_node);
5809 TREE_PUBLIC (decl) = 1;
5810 TREE_STATIC (decl) = 1;
5811 DECL_IGNORED_P (decl) = 1;
5813 #if TARGET_MACHO
5814 if (TARGET_MACHO)
5816 switch_to_section (darwin_sections[picbase_thunk_section]);
5817 fputs ("\t.weak_definition\t", asm_out_file);
5818 assemble_name (asm_out_file, name);
5819 fputs ("\n\t.private_extern\t", asm_out_file);
5820 assemble_name (asm_out_file, name);
5821 putc ('\n', asm_out_file);
5822 ASM_OUTPUT_LABEL (asm_out_file, name);
5823 DECL_WEAK (decl) = 1;
5825 else
5826 #endif
5827 if (USE_HIDDEN_LINKONCE)
5829 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5831 targetm.asm_out.unique_section (decl, 0);
5832 switch_to_section (get_named_section (decl, NULL, 0));
5834 targetm.asm_out.globalize_label (asm_out_file, name);
5835 fputs ("\t.hidden\t", asm_out_file);
5836 assemble_name (asm_out_file, name);
5837 putc ('\n', asm_out_file);
5838 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5840 else
5842 switch_to_section (text_section);
5843 ASM_OUTPUT_LABEL (asm_out_file, name);
5846 DECL_INITIAL (decl) = make_node (BLOCK);
5847 current_function_decl = decl;
5848 allocate_struct_function (decl, false);
5849 init_function_start (decl);
5850 /* We're about to hide the function body from callees of final_* by
5851 emitting it directly; tell them we're a thunk, if they care. */
5852 cfun->is_thunk = true;
5853 first_function_block_is_cold = false;
5854 /* Make sure unwind info is emitted for the thunk if needed. */
5855 final_start_function (emit_barrier (), asm_out_file, 1);
5857 output_indirect_thunk (regno);
5859 final_end_function ();
5860 init_insn_lengths ();
5861 free_after_compilation (cfun);
5862 set_cfun (NULL);
5863 current_function_decl = NULL;
5866 static int pic_labels_used;
5868 /* Fills in the label name that should be used for a pc thunk for
5869 the given register. */
5871 static void
5872 get_pc_thunk_name (char name[32], unsigned int regno)
5874 gcc_assert (!TARGET_64BIT);
5876 if (USE_HIDDEN_LINKONCE)
5877 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
5878 else
5879 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5883 /* This function generates code for -fpic that loads %ebx with
5884 the return address of the caller and then returns. */
5886 static void
5887 ix86_code_end (void)
5889 rtx xops[2];
5890 unsigned int regno;
5892 if (indirect_return_needed)
5893 output_indirect_thunk_function (indirect_thunk_prefix_none,
5894 INVALID_REGNUM, true);
5895 if (indirect_return_via_cx)
5896 output_indirect_thunk_function (indirect_thunk_prefix_none,
5897 CX_REG, true);
5898 if (indirect_thunk_needed)
5899 output_indirect_thunk_function (indirect_thunk_prefix_none,
5900 INVALID_REGNUM, false);
5902 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
5904 unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
5905 if ((indirect_thunks_used & (1 << i)))
5906 output_indirect_thunk_function (indirect_thunk_prefix_none,
5907 regno, false);
5910 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
5912 char name[32];
5913 tree decl;
5915 if ((indirect_thunks_used & (1 << regno)))
5916 output_indirect_thunk_function (indirect_thunk_prefix_none,
5917 regno, false);
5919 if (!(pic_labels_used & (1 << regno)))
5920 continue;
5922 get_pc_thunk_name (name, regno);
5924 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5925 get_identifier (name),
5926 build_function_type_list (void_type_node, NULL_TREE));
5927 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5928 NULL_TREE, void_type_node);
5929 TREE_PUBLIC (decl) = 1;
5930 TREE_STATIC (decl) = 1;
5931 DECL_IGNORED_P (decl) = 1;
5933 #if TARGET_MACHO
5934 if (TARGET_MACHO)
5936 switch_to_section (darwin_sections[picbase_thunk_section]);
5937 fputs ("\t.weak_definition\t", asm_out_file);
5938 assemble_name (asm_out_file, name);
5939 fputs ("\n\t.private_extern\t", asm_out_file);
5940 assemble_name (asm_out_file, name);
5941 putc ('\n', asm_out_file);
5942 ASM_OUTPUT_LABEL (asm_out_file, name);
5943 DECL_WEAK (decl) = 1;
5945 else
5946 #endif
5947 if (USE_HIDDEN_LINKONCE)
5949 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5951 targetm.asm_out.unique_section (decl, 0);
5952 switch_to_section (get_named_section (decl, NULL, 0));
5954 targetm.asm_out.globalize_label (asm_out_file, name);
5955 fputs ("\t.hidden\t", asm_out_file);
5956 assemble_name (asm_out_file, name);
5957 putc ('\n', asm_out_file);
5958 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5960 else
5962 switch_to_section (text_section);
5963 ASM_OUTPUT_LABEL (asm_out_file, name);
5966 DECL_INITIAL (decl) = make_node (BLOCK);
5967 current_function_decl = decl;
5968 allocate_struct_function (decl, false);
5969 init_function_start (decl);
5970 /* We're about to hide the function body from callees of final_* by
5971 emitting it directly; tell them we're a thunk, if they care. */
5972 cfun->is_thunk = true;
5973 first_function_block_is_cold = false;
5974 /* Make sure unwind info is emitted for the thunk if needed. */
5975 final_start_function (emit_barrier (), asm_out_file, 1);
5977 /* Pad stack IP move with 4 instructions (two NOPs count
5978 as one instruction). */
5979 if (TARGET_PAD_SHORT_FUNCTION)
5981 int i = 8;
5983 while (i--)
5984 fputs ("\tnop\n", asm_out_file);
5987 xops[0] = gen_rtx_REG (Pmode, regno);
5988 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
5989 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
5990 output_asm_insn ("%!ret", NULL);
5991 final_end_function ();
5992 init_insn_lengths ();
5993 free_after_compilation (cfun);
5994 set_cfun (NULL);
5995 current_function_decl = NULL;
5998 if (flag_split_stack)
5999 file_end_indicate_split_stack ();
6002 /* Emit code for the SET_GOT patterns. */
6004 const char *
6005 output_set_got (rtx dest, rtx label)
6007 rtx xops[3];
6009 xops[0] = dest;
6011 if (TARGET_VXWORKS_RTP && flag_pic)
6013 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6014 xops[2] = gen_rtx_MEM (Pmode,
6015 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6016 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6018 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6019 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6020 an unadorned address. */
6021 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6022 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6023 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6024 return "";
6027 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6029 if (flag_pic)
6031 char name[32];
6032 get_pc_thunk_name (name, REGNO (dest));
6033 pic_labels_used |= 1 << REGNO (dest);
6035 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6036 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6037 output_asm_insn ("%!call\t%X2", xops);
6039 #if TARGET_MACHO
6040 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6041 This is what will be referenced by the Mach-O PIC subsystem. */
6042 if (machopic_should_output_picbase_label () || !label)
6043 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6045 /* When we are restoring the pic base at the site of a nonlocal label,
6046 and we decided to emit the pic base above, we will still output a
6047 local label used for calculating the correction offset (even though
6048 the offset will be 0 in that case). */
6049 if (label)
6050 targetm.asm_out.internal_label (asm_out_file, "L",
6051 CODE_LABEL_NUMBER (label));
6052 #endif
6054 else
6056 if (TARGET_MACHO)
6057 /* We don't need a pic base, we're not producing pic. */
6058 gcc_unreachable ();
6060 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6061 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6062 targetm.asm_out.internal_label (asm_out_file, "L",
6063 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6066 if (!TARGET_MACHO)
6067 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6069 return "";
6072 /* Generate an "push" pattern for input ARG. */
6075 gen_push (rtx arg)
6077 struct machine_function *m = cfun->machine;
6079 if (m->fs.cfa_reg == stack_pointer_rtx)
6080 m->fs.cfa_offset += UNITS_PER_WORD;
6081 m->fs.sp_offset += UNITS_PER_WORD;
6083 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6084 arg = gen_rtx_REG (word_mode, REGNO (arg));
6086 return gen_rtx_SET (gen_rtx_MEM (word_mode,
6087 gen_rtx_PRE_DEC (Pmode,
6088 stack_pointer_rtx)),
6089 arg);
6092 /* Generate an "pop" pattern for input ARG. */
6095 gen_pop (rtx arg)
6097 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6098 arg = gen_rtx_REG (word_mode, REGNO (arg));
6100 return gen_rtx_SET (arg,
6101 gen_rtx_MEM (word_mode,
6102 gen_rtx_POST_INC (Pmode,
6103 stack_pointer_rtx)));
6106 /* Return >= 0 if there is an unused call-clobbered register available
6107 for the entire function. */
6109 static unsigned int
6110 ix86_select_alt_pic_regnum (void)
6112 if (ix86_use_pseudo_pic_reg ())
6113 return INVALID_REGNUM;
6115 if (crtl->is_leaf
6116 && !crtl->profile
6117 && !ix86_current_function_calls_tls_descriptor)
6119 int i, drap;
6120 /* Can't use the same register for both PIC and DRAP. */
6121 if (crtl->drap_reg)
6122 drap = REGNO (crtl->drap_reg);
6123 else
6124 drap = -1;
6125 for (i = 2; i >= 0; --i)
6126 if (i != drap && !df_regs_ever_live_p (i))
6127 return i;
6130 return INVALID_REGNUM;
6133 /* Return true if REGNO is used by the epilogue. */
6135 bool
6136 ix86_epilogue_uses (int regno)
6138 /* If there are no caller-saved registers, we preserve all registers,
6139 except for MMX and x87 registers which aren't supported when saving
6140 and restoring registers. Don't explicitly save SP register since
6141 it is always preserved. */
6142 return (epilogue_completed
6143 && cfun->machine->no_caller_saved_registers
6144 && !fixed_regs[regno]
6145 && !STACK_REGNO_P (regno)
6146 && !MMX_REGNO_P (regno));
6149 /* Return nonzero if register REGNO can be used as a scratch register
6150 in peephole2. */
6152 static bool
6153 ix86_hard_regno_scratch_ok (unsigned int regno)
6155 /* If there are no caller-saved registers, we can't use any register
6156 as a scratch register after epilogue and use REGNO as scratch
6157 register only if it has been used before to avoid saving and
6158 restoring it. */
6159 return (!cfun->machine->no_caller_saved_registers
6160 || (!epilogue_completed
6161 && df_regs_ever_live_p (regno)));
6164 /* Return TRUE if we need to save REGNO. */
6166 bool
6167 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6169 /* If there are no caller-saved registers, we preserve all registers,
6170 except for MMX and x87 registers which aren't supported when saving
6171 and restoring registers. Don't explicitly save SP register since
6172 it is always preserved. */
6173 if (cfun->machine->no_caller_saved_registers)
6175 /* Don't preserve registers used for function return value. */
6176 rtx reg = crtl->return_rtx;
6177 if (reg)
6179 unsigned int i = REGNO (reg);
6180 unsigned int nregs = REG_NREGS (reg);
6181 while (nregs-- > 0)
6182 if ((i + nregs) == regno)
6183 return false;
6186 return (df_regs_ever_live_p (regno)
6187 && !fixed_regs[regno]
6188 && !STACK_REGNO_P (regno)
6189 && !MMX_REGNO_P (regno)
6190 && (regno != HARD_FRAME_POINTER_REGNUM
6191 || !frame_pointer_needed));
6194 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6195 && pic_offset_table_rtx)
6197 if (ix86_use_pseudo_pic_reg ())
6199 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6200 _mcount in prologue. */
6201 if (!TARGET_64BIT && flag_pic && crtl->profile)
6202 return true;
6204 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6205 || crtl->profile
6206 || crtl->calls_eh_return
6207 || crtl->uses_const_pool
6208 || cfun->has_nonlocal_label)
6209 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6212 if (crtl->calls_eh_return && maybe_eh_return)
6214 unsigned i;
6215 for (i = 0; ; i++)
6217 unsigned test = EH_RETURN_DATA_REGNO (i);
6218 if (test == INVALID_REGNUM)
6219 break;
6220 if (test == regno)
6221 return true;
6225 if (ignore_outlined && cfun->machine->call_ms2sysv)
6227 unsigned count = cfun->machine->call_ms2sysv_extra_regs
6228 + xlogue_layout::MIN_REGS;
6229 if (xlogue_layout::is_stub_managed_reg (regno, count))
6230 return false;
6233 if (crtl->drap_reg
6234 && regno == REGNO (crtl->drap_reg)
6235 && !cfun->machine->no_drap_save_restore)
6236 return true;
6238 return (df_regs_ever_live_p (regno)
6239 && !call_used_or_fixed_reg_p (regno)
6240 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6243 /* Return number of saved general prupose registers. */
6245 static int
6246 ix86_nsaved_regs (void)
6248 int nregs = 0;
6249 int regno;
6251 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6252 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6253 nregs ++;
6254 return nregs;
6257 /* Return number of saved SSE registers. */
6259 static int
6260 ix86_nsaved_sseregs (void)
6262 int nregs = 0;
6263 int regno;
6265 if (!TARGET_64BIT_MS_ABI)
6266 return 0;
6267 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6268 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6269 nregs ++;
6270 return nregs;
6273 /* Given FROM and TO register numbers, say whether this elimination is
6274 allowed. If stack alignment is needed, we can only replace argument
6275 pointer with hard frame pointer, or replace frame pointer with stack
6276 pointer. Otherwise, frame pointer elimination is automatically
6277 handled and all other eliminations are valid. */
6279 static bool
6280 ix86_can_eliminate (const int from, const int to)
6282 if (stack_realign_fp)
6283 return ((from == ARG_POINTER_REGNUM
6284 && to == HARD_FRAME_POINTER_REGNUM)
6285 || (from == FRAME_POINTER_REGNUM
6286 && to == STACK_POINTER_REGNUM));
6287 else
6288 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6291 /* Return the offset between two registers, one to be eliminated, and the other
6292 its replacement, at the start of a routine. */
6294 HOST_WIDE_INT
6295 ix86_initial_elimination_offset (int from, int to)
6297 struct ix86_frame &frame = cfun->machine->frame;
6299 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6300 return frame.hard_frame_pointer_offset;
6301 else if (from == FRAME_POINTER_REGNUM
6302 && to == HARD_FRAME_POINTER_REGNUM)
6303 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6304 else
6306 gcc_assert (to == STACK_POINTER_REGNUM);
6308 if (from == ARG_POINTER_REGNUM)
6309 return frame.stack_pointer_offset;
6311 gcc_assert (from == FRAME_POINTER_REGNUM);
6312 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6316 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6317 void warn_once_call_ms2sysv_xlogues (const char *feature)
6319 static bool warned_once = false;
6320 if (!warned_once)
6322 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6323 feature);
6324 warned_once = true;
6328 /* Return the probing interval for -fstack-clash-protection. */
6330 static HOST_WIDE_INT
6331 get_probe_interval (void)
6333 if (flag_stack_clash_protection)
6334 return (HOST_WIDE_INT_1U
6335 << param_stack_clash_protection_probe_interval);
6336 else
6337 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6340 /* When using -fsplit-stack, the allocation routines set a field in
6341 the TCB to the bottom of the stack plus this much space, measured
6342 in bytes. */
6344 #define SPLIT_STACK_AVAILABLE 256
6346 /* Fill structure ix86_frame about frame of currently computed function. */
6348 static void
6349 ix86_compute_frame_layout (void)
6351 struct ix86_frame *frame = &cfun->machine->frame;
6352 struct machine_function *m = cfun->machine;
6353 unsigned HOST_WIDE_INT stack_alignment_needed;
6354 HOST_WIDE_INT offset;
6355 unsigned HOST_WIDE_INT preferred_alignment;
6356 HOST_WIDE_INT size = ix86_get_frame_size ();
6357 HOST_WIDE_INT to_allocate;
6359 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6360 * ms_abi functions that call a sysv function. We now need to prune away
6361 * cases where it should be disabled. */
6362 if (TARGET_64BIT && m->call_ms2sysv)
6364 gcc_assert (TARGET_64BIT_MS_ABI);
6365 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
6366 gcc_assert (!TARGET_SEH);
6367 gcc_assert (TARGET_SSE);
6368 gcc_assert (!ix86_using_red_zone ());
6370 if (crtl->calls_eh_return)
6372 gcc_assert (!reload_completed);
6373 m->call_ms2sysv = false;
6374 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6377 else if (ix86_static_chain_on_stack)
6379 gcc_assert (!reload_completed);
6380 m->call_ms2sysv = false;
6381 warn_once_call_ms2sysv_xlogues ("static call chains");
6384 /* Finally, compute which registers the stub will manage. */
6385 else
6387 unsigned count = xlogue_layout::count_stub_managed_regs ();
6388 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
6389 m->call_ms2sysv_pad_in = 0;
6393 frame->nregs = ix86_nsaved_regs ();
6394 frame->nsseregs = ix86_nsaved_sseregs ();
6396 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6397 except for function prologues, leaf functions and when the defult
6398 incoming stack boundary is overriden at command line or via
6399 force_align_arg_pointer attribute.
6401 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6402 at call sites, including profile function calls.
6404 if (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
6405 && crtl->preferred_stack_boundary < 128)
6406 && (!crtl->is_leaf || cfun->calls_alloca != 0
6407 || ix86_current_function_calls_tls_descriptor
6408 || (TARGET_MACHO && crtl->profile)
6409 || ix86_incoming_stack_boundary < 128))
6411 crtl->preferred_stack_boundary = 128;
6412 crtl->stack_alignment_needed = 128;
6415 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6416 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6418 gcc_assert (!size || stack_alignment_needed);
6419 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6420 gcc_assert (preferred_alignment <= stack_alignment_needed);
6422 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6423 gcc_assert (TARGET_64BIT || !frame->nsseregs);
6424 if (TARGET_64BIT && m->call_ms2sysv)
6426 gcc_assert (stack_alignment_needed >= 16);
6427 gcc_assert (!frame->nsseregs);
6430 /* For SEH we have to limit the amount of code movement into the prologue.
6431 At present we do this via a BLOCKAGE, at which point there's very little
6432 scheduling that can be done, which means that there's very little point
6433 in doing anything except PUSHs. */
6434 if (TARGET_SEH)
6435 m->use_fast_prologue_epilogue = false;
6436 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
6438 int count = frame->nregs;
6439 struct cgraph_node *node = cgraph_node::get (current_function_decl);
6441 /* The fast prologue uses move instead of push to save registers. This
6442 is significantly longer, but also executes faster as modern hardware
6443 can execute the moves in parallel, but can't do that for push/pop.
6445 Be careful about choosing what prologue to emit: When function takes
6446 many instructions to execute we may use slow version as well as in
6447 case function is known to be outside hot spot (this is known with
6448 feedback only). Weight the size of function by number of registers
6449 to save as it is cheap to use one or two push instructions but very
6450 slow to use many of them.
6452 Calling this hook multiple times with the same frame requirements
6453 must produce the same layout, since the RA might otherwise be
6454 unable to reach a fixed point or might fail its final sanity checks.
6455 This means that once we've assumed that a function does or doesn't
6456 have a particular size, we have to stick to that assumption
6457 regardless of how the function has changed since. */
6458 if (count)
6459 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6460 if (node->frequency < NODE_FREQUENCY_NORMAL
6461 || (flag_branch_probabilities
6462 && node->frequency < NODE_FREQUENCY_HOT))
6463 m->use_fast_prologue_epilogue = false;
6464 else
6466 if (count != frame->expensive_count)
6468 frame->expensive_count = count;
6469 frame->expensive_p = expensive_function_p (count);
6471 m->use_fast_prologue_epilogue = !frame->expensive_p;
6475 frame->save_regs_using_mov
6476 = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
6478 /* Skip return address and error code in exception handler. */
6479 offset = INCOMING_FRAME_SP_OFFSET;
6481 /* Skip pushed static chain. */
6482 if (ix86_static_chain_on_stack)
6483 offset += UNITS_PER_WORD;
6485 /* Skip saved base pointer. */
6486 if (frame_pointer_needed)
6487 offset += UNITS_PER_WORD;
6488 frame->hfp_save_offset = offset;
6490 /* The traditional frame pointer location is at the top of the frame. */
6491 frame->hard_frame_pointer_offset = offset;
6493 /* Register save area */
6494 offset += frame->nregs * UNITS_PER_WORD;
6495 frame->reg_save_offset = offset;
6497 /* On SEH target, registers are pushed just before the frame pointer
6498 location. */
6499 if (TARGET_SEH)
6500 frame->hard_frame_pointer_offset = offset;
6502 /* Calculate the size of the va-arg area (not including padding, if any). */
6503 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
6505 /* Also adjust stack_realign_offset for the largest alignment of
6506 stack slot actually used. */
6507 if (stack_realign_fp
6508 || (cfun->machine->max_used_stack_alignment != 0
6509 && (offset % cfun->machine->max_used_stack_alignment) != 0))
6511 /* We may need a 16-byte aligned stack for the remainder of the
6512 register save area, but the stack frame for the local function
6513 may require a greater alignment if using AVX/2/512. In order
6514 to avoid wasting space, we first calculate the space needed for
6515 the rest of the register saves, add that to the stack pointer,
6516 and then realign the stack to the boundary of the start of the
6517 frame for the local function. */
6518 HOST_WIDE_INT space_needed = 0;
6519 HOST_WIDE_INT sse_reg_space_needed = 0;
6521 if (TARGET_64BIT)
6523 if (m->call_ms2sysv)
6525 m->call_ms2sysv_pad_in = 0;
6526 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
6529 else if (frame->nsseregs)
6530 /* The only ABI that has saved SSE registers (Win64) also has a
6531 16-byte aligned default stack. However, many programs violate
6532 the ABI, and Wine64 forces stack realignment to compensate. */
6533 space_needed = frame->nsseregs * 16;
6535 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
6537 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6538 rounding to be pedantic. */
6539 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
6541 else
6542 space_needed = frame->va_arg_size;
6544 /* Record the allocation size required prior to the realignment AND. */
6545 frame->stack_realign_allocate = space_needed;
6547 /* The re-aligned stack starts at frame->stack_realign_offset. Values
6548 before this point are not directly comparable with values below
6549 this point. Use sp_valid_at to determine if the stack pointer is
6550 valid for a given offset, fp_valid_at for the frame pointer, or
6551 choose_baseaddr to have a base register chosen for you.
6553 Note that the result of (frame->stack_realign_offset
6554 & (stack_alignment_needed - 1)) may not equal zero. */
6555 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
6556 frame->stack_realign_offset = offset - space_needed;
6557 frame->sse_reg_save_offset = frame->stack_realign_offset
6558 + sse_reg_space_needed;
6560 else
6562 frame->stack_realign_offset = offset;
6564 if (TARGET_64BIT && m->call_ms2sysv)
6566 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
6567 offset += xlogue_layout::get_instance ().get_stack_space_used ();
6570 /* Align and set SSE register save area. */
6571 else if (frame->nsseregs)
6573 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6574 required and the DRAP re-alignment boundary is at least 16 bytes,
6575 then we want the SSE register save area properly aligned. */
6576 if (ix86_incoming_stack_boundary >= 128
6577 || (stack_realign_drap && stack_alignment_needed >= 16))
6578 offset = ROUND_UP (offset, 16);
6579 offset += frame->nsseregs * 16;
6581 frame->sse_reg_save_offset = offset;
6582 offset += frame->va_arg_size;
6585 /* Align start of frame for local function. When a function call
6586 is removed, it may become a leaf function. But if argument may
6587 be passed on stack, we need to align the stack when there is no
6588 tail call. */
6589 if (m->call_ms2sysv
6590 || frame->va_arg_size != 0
6591 || size != 0
6592 || !crtl->is_leaf
6593 || (!crtl->tail_call_emit
6594 && cfun->machine->outgoing_args_on_stack)
6595 || cfun->calls_alloca
6596 || ix86_current_function_calls_tls_descriptor)
6597 offset = ROUND_UP (offset, stack_alignment_needed);
6599 /* Frame pointer points here. */
6600 frame->frame_pointer_offset = offset;
6602 offset += size;
6604 /* Add outgoing arguments area. Can be skipped if we eliminated
6605 all the function calls as dead code.
6606 Skipping is however impossible when function calls alloca. Alloca
6607 expander assumes that last crtl->outgoing_args_size
6608 of stack frame are unused. */
6609 if (ACCUMULATE_OUTGOING_ARGS
6610 && (!crtl->is_leaf || cfun->calls_alloca
6611 || ix86_current_function_calls_tls_descriptor))
6613 offset += crtl->outgoing_args_size;
6614 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6616 else
6617 frame->outgoing_arguments_size = 0;
6619 /* Align stack boundary. Only needed if we're calling another function
6620 or using alloca. */
6621 if (!crtl->is_leaf || cfun->calls_alloca
6622 || ix86_current_function_calls_tls_descriptor)
6623 offset = ROUND_UP (offset, preferred_alignment);
6625 /* We've reached end of stack frame. */
6626 frame->stack_pointer_offset = offset;
6628 /* Size prologue needs to allocate. */
6629 to_allocate = offset - frame->sse_reg_save_offset;
6631 if ((!to_allocate && frame->nregs <= 1)
6632 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
6633 /* If static stack checking is enabled and done with probes,
6634 the registers need to be saved before allocating the frame. */
6635 || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6636 /* If stack clash probing needs a loop, then it needs a
6637 scratch register. But the returned register is only guaranteed
6638 to be safe to use after register saves are complete. So if
6639 stack clash protections are enabled and the allocated frame is
6640 larger than the probe interval, then use pushes to save
6641 callee saved registers. */
6642 || (flag_stack_clash_protection && to_allocate > get_probe_interval ()))
6643 frame->save_regs_using_mov = false;
6645 if (ix86_using_red_zone ()
6646 && crtl->sp_is_unchanging
6647 && crtl->is_leaf
6648 && !ix86_pc_thunk_call_expanded
6649 && !ix86_current_function_calls_tls_descriptor)
6651 frame->red_zone_size = to_allocate;
6652 if (frame->save_regs_using_mov)
6653 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6654 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6655 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6657 else
6658 frame->red_zone_size = 0;
6659 frame->stack_pointer_offset -= frame->red_zone_size;
6661 /* The SEH frame pointer location is near the bottom of the frame.
6662 This is enforced by the fact that the difference between the
6663 stack pointer and the frame pointer is limited to 240 bytes in
6664 the unwind data structure. */
6665 if (TARGET_SEH)
6667 HOST_WIDE_INT diff;
6669 /* If we can leave the frame pointer where it is, do so. Also, returns
6670 the establisher frame for __builtin_frame_address (0). */
6671 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
6672 if (diff <= SEH_MAX_FRAME_SIZE
6673 && (diff > 240 || (diff & 15) != 0)
6674 && !crtl->accesses_prior_frames)
6676 /* Ideally we'd determine what portion of the local stack frame
6677 (within the constraint of the lowest 240) is most heavily used.
6678 But without that complication, simply bias the frame pointer
6679 by 128 bytes so as to maximize the amount of the local stack
6680 frame that is addressable with 8-bit offsets. */
6681 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
6686 /* This is semi-inlined memory_address_length, but simplified
6687 since we know that we're always dealing with reg+offset, and
6688 to avoid having to create and discard all that rtl. */
6690 static inline int
6691 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
6693 int len = 4;
6695 if (offset == 0)
6697 /* EBP and R13 cannot be encoded without an offset. */
6698 len = (regno == BP_REG || regno == R13_REG);
6700 else if (IN_RANGE (offset, -128, 127))
6701 len = 1;
6703 /* ESP and R12 must be encoded with a SIB byte. */
6704 if (regno == SP_REG || regno == R12_REG)
6705 len++;
6707 return len;
6710 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
6711 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6713 static bool
6714 sp_valid_at (HOST_WIDE_INT cfa_offset)
6716 const struct machine_frame_state &fs = cfun->machine->fs;
6717 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
6719 /* Validate that the cfa_offset isn't in a "no-man's land". */
6720 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
6721 return false;
6723 return fs.sp_valid;
6726 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
6727 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6729 static inline bool
6730 fp_valid_at (HOST_WIDE_INT cfa_offset)
6732 const struct machine_frame_state &fs = cfun->machine->fs;
6733 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
6735 /* Validate that the cfa_offset isn't in a "no-man's land". */
6736 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
6737 return false;
6739 return fs.fp_valid;
6742 /* Choose a base register based upon alignment requested, speed and/or
6743 size. */
6745 static void
6746 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
6747 HOST_WIDE_INT &base_offset,
6748 unsigned int align_reqested, unsigned int *align)
6750 const struct machine_function *m = cfun->machine;
6751 unsigned int hfp_align;
6752 unsigned int drap_align;
6753 unsigned int sp_align;
6754 bool hfp_ok = fp_valid_at (cfa_offset);
6755 bool drap_ok = m->fs.drap_valid;
6756 bool sp_ok = sp_valid_at (cfa_offset);
6758 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
6760 /* Filter out any registers that don't meet the requested alignment
6761 criteria. */
6762 if (align_reqested)
6764 if (m->fs.realigned)
6765 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
6766 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
6767 notes (which we would need to use a realigned stack pointer),
6768 so disable on SEH targets. */
6769 else if (m->fs.sp_realigned)
6770 sp_align = crtl->stack_alignment_needed;
6772 hfp_ok = hfp_ok && hfp_align >= align_reqested;
6773 drap_ok = drap_ok && drap_align >= align_reqested;
6774 sp_ok = sp_ok && sp_align >= align_reqested;
6777 if (m->use_fast_prologue_epilogue)
6779 /* Choose the base register most likely to allow the most scheduling
6780 opportunities. Generally FP is valid throughout the function,
6781 while DRAP must be reloaded within the epilogue. But choose either
6782 over the SP due to increased encoding size. */
6784 if (hfp_ok)
6786 base_reg = hard_frame_pointer_rtx;
6787 base_offset = m->fs.fp_offset - cfa_offset;
6789 else if (drap_ok)
6791 base_reg = crtl->drap_reg;
6792 base_offset = 0 - cfa_offset;
6794 else if (sp_ok)
6796 base_reg = stack_pointer_rtx;
6797 base_offset = m->fs.sp_offset - cfa_offset;
6800 else
6802 HOST_WIDE_INT toffset;
6803 int len = 16, tlen;
6805 /* Choose the base register with the smallest address encoding.
6806 With a tie, choose FP > DRAP > SP. */
6807 if (sp_ok)
6809 base_reg = stack_pointer_rtx;
6810 base_offset = m->fs.sp_offset - cfa_offset;
6811 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
6813 if (drap_ok)
6815 toffset = 0 - cfa_offset;
6816 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
6817 if (tlen <= len)
6819 base_reg = crtl->drap_reg;
6820 base_offset = toffset;
6821 len = tlen;
6824 if (hfp_ok)
6826 toffset = m->fs.fp_offset - cfa_offset;
6827 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
6828 if (tlen <= len)
6830 base_reg = hard_frame_pointer_rtx;
6831 base_offset = toffset;
6836 /* Set the align return value. */
6837 if (align)
6839 if (base_reg == stack_pointer_rtx)
6840 *align = sp_align;
6841 else if (base_reg == crtl->drap_reg)
6842 *align = drap_align;
6843 else if (base_reg == hard_frame_pointer_rtx)
6844 *align = hfp_align;
6848 /* Return an RTX that points to CFA_OFFSET within the stack frame and
6849 the alignment of address. If ALIGN is non-null, it should point to
6850 an alignment value (in bits) that is preferred or zero and will
6851 recieve the alignment of the base register that was selected,
6852 irrespective of rather or not CFA_OFFSET is a multiple of that
6853 alignment value. If it is possible for the base register offset to be
6854 non-immediate then SCRATCH_REGNO should specify a scratch register to
6855 use.
6857 The valid base registers are taken from CFUN->MACHINE->FS. */
6859 static rtx
6860 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
6861 unsigned int scratch_regno = INVALID_REGNUM)
6863 rtx base_reg = NULL;
6864 HOST_WIDE_INT base_offset = 0;
6866 /* If a specific alignment is requested, try to get a base register
6867 with that alignment first. */
6868 if (align && *align)
6869 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
6871 if (!base_reg)
6872 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
6874 gcc_assert (base_reg != NULL);
6876 rtx base_offset_rtx = GEN_INT (base_offset);
6878 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
6880 gcc_assert (scratch_regno != INVALID_REGNUM);
6882 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
6883 emit_move_insn (scratch_reg, base_offset_rtx);
6885 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
6888 return plus_constant (Pmode, base_reg, base_offset);
6891 /* Emit code to save registers in the prologue. */
6893 static void
6894 ix86_emit_save_regs (void)
6896 unsigned int regno;
6897 rtx_insn *insn;
6899 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
6900 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6902 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
6903 RTX_FRAME_RELATED_P (insn) = 1;
6907 /* Emit a single register save at CFA - CFA_OFFSET. */
6909 static void
6910 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
6911 HOST_WIDE_INT cfa_offset)
6913 struct machine_function *m = cfun->machine;
6914 rtx reg = gen_rtx_REG (mode, regno);
6915 rtx mem, addr, base, insn;
6916 unsigned int align = GET_MODE_ALIGNMENT (mode);
6918 addr = choose_baseaddr (cfa_offset, &align);
6919 mem = gen_frame_mem (mode, addr);
6921 /* The location aligment depends upon the base register. */
6922 align = MIN (GET_MODE_ALIGNMENT (mode), align);
6923 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
6924 set_mem_align (mem, align);
6926 insn = emit_insn (gen_rtx_SET (mem, reg));
6927 RTX_FRAME_RELATED_P (insn) = 1;
6929 base = addr;
6930 if (GET_CODE (base) == PLUS)
6931 base = XEXP (base, 0);
6932 gcc_checking_assert (REG_P (base));
6934 /* When saving registers into a re-aligned local stack frame, avoid
6935 any tricky guessing by dwarf2out. */
6936 if (m->fs.realigned)
6938 gcc_checking_assert (stack_realign_drap);
6940 if (regno == REGNO (crtl->drap_reg))
6942 /* A bit of a hack. We force the DRAP register to be saved in
6943 the re-aligned stack frame, which provides us with a copy
6944 of the CFA that will last past the prologue. Install it. */
6945 gcc_checking_assert (cfun->machine->fs.fp_valid);
6946 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6947 cfun->machine->fs.fp_offset - cfa_offset);
6948 mem = gen_rtx_MEM (mode, addr);
6949 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
6951 else
6953 /* The frame pointer is a stable reference within the
6954 aligned frame. Use it. */
6955 gcc_checking_assert (cfun->machine->fs.fp_valid);
6956 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6957 cfun->machine->fs.fp_offset - cfa_offset);
6958 mem = gen_rtx_MEM (mode, addr);
6959 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6963 else if (base == stack_pointer_rtx && m->fs.sp_realigned
6964 && cfa_offset >= m->fs.sp_realigned_offset)
6966 gcc_checking_assert (stack_realign_fp);
6967 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6970 /* The memory may not be relative to the current CFA register,
6971 which means that we may need to generate a new pattern for
6972 use by the unwind info. */
6973 else if (base != m->fs.cfa_reg)
6975 addr = plus_constant (Pmode, m->fs.cfa_reg,
6976 m->fs.cfa_offset - cfa_offset);
6977 mem = gen_rtx_MEM (mode, addr);
6978 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6982 /* Emit code to save registers using MOV insns.
6983 First register is stored at CFA - CFA_OFFSET. */
6984 static void
6985 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
6987 unsigned int regno;
6989 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6990 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6992 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
6993 cfa_offset -= UNITS_PER_WORD;
6997 /* Emit code to save SSE registers using MOV insns.
6998 First register is stored at CFA - CFA_OFFSET. */
6999 static void
7000 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
7002 unsigned int regno;
7004 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7005 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7007 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7008 cfa_offset -= GET_MODE_SIZE (V4SFmode);
7012 static GTY(()) rtx queued_cfa_restores;
7014 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7015 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7016 Don't add the note if the previously saved value will be left untouched
7017 within stack red-zone till return, as unwinders can find the same value
7018 in the register and on the stack. */
7020 static void
7021 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7023 if (!crtl->shrink_wrapped
7024 && cfa_offset <= cfun->machine->fs.red_zone_offset)
7025 return;
7027 if (insn)
7029 add_reg_note (insn, REG_CFA_RESTORE, reg);
7030 RTX_FRAME_RELATED_P (insn) = 1;
7032 else
7033 queued_cfa_restores
7034 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7037 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7039 static void
7040 ix86_add_queued_cfa_restore_notes (rtx insn)
7042 rtx last;
7043 if (!queued_cfa_restores)
7044 return;
7045 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7047 XEXP (last, 1) = REG_NOTES (insn);
7048 REG_NOTES (insn) = queued_cfa_restores;
7049 queued_cfa_restores = NULL_RTX;
7050 RTX_FRAME_RELATED_P (insn) = 1;
7053 /* Expand prologue or epilogue stack adjustment.
7054 The pattern exist to put a dependency on all ebp-based memory accesses.
7055 STYLE should be negative if instructions should be marked as frame related,
7056 zero if %r11 register is live and cannot be freely used and positive
7057 otherwise. */
7059 static rtx
7060 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7061 int style, bool set_cfa)
7063 struct machine_function *m = cfun->machine;
7064 rtx addend = offset;
7065 rtx insn;
7066 bool add_frame_related_expr = false;
7068 if (!x86_64_immediate_operand (offset, Pmode))
7070 /* r11 is used by indirect sibcall return as well, set before the
7071 epilogue and used after the epilogue. */
7072 if (style)
7073 addend = gen_rtx_REG (Pmode, R11_REG);
7074 else
7076 gcc_assert (src != hard_frame_pointer_rtx
7077 && dest != hard_frame_pointer_rtx);
7078 addend = hard_frame_pointer_rtx;
7080 emit_insn (gen_rtx_SET (addend, offset));
7081 if (style < 0)
7082 add_frame_related_expr = true;
7085 insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7086 (Pmode, dest, src, addend));
7087 if (style >= 0)
7088 ix86_add_queued_cfa_restore_notes (insn);
7090 if (set_cfa)
7092 rtx r;
7094 gcc_assert (m->fs.cfa_reg == src);
7095 m->fs.cfa_offset += INTVAL (offset);
7096 m->fs.cfa_reg = dest;
7098 r = gen_rtx_PLUS (Pmode, src, offset);
7099 r = gen_rtx_SET (dest, r);
7100 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7101 RTX_FRAME_RELATED_P (insn) = 1;
7103 else if (style < 0)
7105 RTX_FRAME_RELATED_P (insn) = 1;
7106 if (add_frame_related_expr)
7108 rtx r = gen_rtx_PLUS (Pmode, src, offset);
7109 r = gen_rtx_SET (dest, r);
7110 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7114 if (dest == stack_pointer_rtx)
7116 HOST_WIDE_INT ooffset = m->fs.sp_offset;
7117 bool valid = m->fs.sp_valid;
7118 bool realigned = m->fs.sp_realigned;
7120 if (src == hard_frame_pointer_rtx)
7122 valid = m->fs.fp_valid;
7123 realigned = false;
7124 ooffset = m->fs.fp_offset;
7126 else if (src == crtl->drap_reg)
7128 valid = m->fs.drap_valid;
7129 realigned = false;
7130 ooffset = 0;
7132 else
7134 /* Else there are two possibilities: SP itself, which we set
7135 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7136 taken care of this by hand along the eh_return path. */
7137 gcc_checking_assert (src == stack_pointer_rtx
7138 || offset == const0_rtx);
7141 m->fs.sp_offset = ooffset - INTVAL (offset);
7142 m->fs.sp_valid = valid;
7143 m->fs.sp_realigned = realigned;
7145 return insn;
7148 /* Find an available register to be used as dynamic realign argument
7149 pointer regsiter. Such a register will be written in prologue and
7150 used in begin of body, so it must not be
7151 1. parameter passing register.
7152 2. GOT pointer.
7153 We reuse static-chain register if it is available. Otherwise, we
7154 use DI for i386 and R13 for x86-64. We chose R13 since it has
7155 shorter encoding.
7157 Return: the regno of chosen register. */
7159 static unsigned int
7160 find_drap_reg (void)
7162 tree decl = cfun->decl;
7164 /* Always use callee-saved register if there are no caller-saved
7165 registers. */
7166 if (TARGET_64BIT)
7168 /* Use R13 for nested function or function need static chain.
7169 Since function with tail call may use any caller-saved
7170 registers in epilogue, DRAP must not use caller-saved
7171 register in such case. */
7172 if (DECL_STATIC_CHAIN (decl)
7173 || cfun->machine->no_caller_saved_registers
7174 || crtl->tail_call_emit)
7175 return R13_REG;
7177 return R10_REG;
7179 else
7181 /* Use DI for nested function or function need static chain.
7182 Since function with tail call may use any caller-saved
7183 registers in epilogue, DRAP must not use caller-saved
7184 register in such case. */
7185 if (DECL_STATIC_CHAIN (decl)
7186 || cfun->machine->no_caller_saved_registers
7187 || crtl->tail_call_emit)
7188 return DI_REG;
7190 /* Reuse static chain register if it isn't used for parameter
7191 passing. */
7192 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
7194 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
7195 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
7196 return CX_REG;
7198 return DI_REG;
7202 /* Return minimum incoming stack alignment. */
7204 static unsigned int
7205 ix86_minimum_incoming_stack_boundary (bool sibcall)
7207 unsigned int incoming_stack_boundary;
7209 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7210 if (cfun->machine->func_type != TYPE_NORMAL)
7211 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
7212 /* Prefer the one specified at command line. */
7213 else if (ix86_user_incoming_stack_boundary)
7214 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
7215 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7216 if -mstackrealign is used, it isn't used for sibcall check and
7217 estimated stack alignment is 128bit. */
7218 else if (!sibcall
7219 && ix86_force_align_arg_pointer
7220 && crtl->stack_alignment_estimated == 128)
7221 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7222 else
7223 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
7225 /* Incoming stack alignment can be changed on individual functions
7226 via force_align_arg_pointer attribute. We use the smallest
7227 incoming stack boundary. */
7228 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
7229 && lookup_attribute ("force_align_arg_pointer",
7230 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7231 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7233 /* The incoming stack frame has to be aligned at least at
7234 parm_stack_boundary. */
7235 if (incoming_stack_boundary < crtl->parm_stack_boundary)
7236 incoming_stack_boundary = crtl->parm_stack_boundary;
7238 /* Stack at entrance of main is aligned by runtime. We use the
7239 smallest incoming stack boundary. */
7240 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
7241 && DECL_NAME (current_function_decl)
7242 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7243 && DECL_FILE_SCOPE_P (current_function_decl))
7244 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7246 return incoming_stack_boundary;
7249 /* Update incoming stack boundary and estimated stack alignment. */
7251 static void
7252 ix86_update_stack_boundary (void)
7254 ix86_incoming_stack_boundary
7255 = ix86_minimum_incoming_stack_boundary (false);
7257 /* x86_64 vararg needs 16byte stack alignment for register save area. */
7258 if (TARGET_64BIT
7259 && cfun->stdarg
7260 && crtl->stack_alignment_estimated < 128)
7261 crtl->stack_alignment_estimated = 128;
7263 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
7264 if (ix86_tls_descriptor_calls_expanded_in_cfun
7265 && crtl->preferred_stack_boundary < 128)
7266 crtl->preferred_stack_boundary = 128;
7269 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7270 needed or an rtx for DRAP otherwise. */
7272 static rtx
7273 ix86_get_drap_rtx (void)
7275 /* We must use DRAP if there are outgoing arguments on stack or
7276 the stack pointer register is clobbered by asm statment and
7277 ACCUMULATE_OUTGOING_ARGS is false. */
7278 if (ix86_force_drap
7279 || ((cfun->machine->outgoing_args_on_stack
7280 || crtl->sp_is_clobbered_by_asm)
7281 && !ACCUMULATE_OUTGOING_ARGS))
7282 crtl->need_drap = true;
7284 if (stack_realign_drap)
7286 /* Assign DRAP to vDRAP and returns vDRAP */
7287 unsigned int regno = find_drap_reg ();
7288 rtx drap_vreg;
7289 rtx arg_ptr;
7290 rtx_insn *seq, *insn;
7292 arg_ptr = gen_rtx_REG (Pmode, regno);
7293 crtl->drap_reg = arg_ptr;
7295 start_sequence ();
7296 drap_vreg = copy_to_reg (arg_ptr);
7297 seq = get_insns ();
7298 end_sequence ();
7300 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7301 if (!optimize)
7303 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
7304 RTX_FRAME_RELATED_P (insn) = 1;
7306 return drap_vreg;
7308 else
7309 return NULL;
7312 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7314 static rtx
7315 ix86_internal_arg_pointer (void)
7317 return virtual_incoming_args_rtx;
7320 struct scratch_reg {
7321 rtx reg;
7322 bool saved;
7325 /* Return a short-lived scratch register for use on function entry.
7326 In 32-bit mode, it is valid only after the registers are saved
7327 in the prologue. This register must be released by means of
7328 release_scratch_register_on_entry once it is dead. */
7330 static void
7331 get_scratch_register_on_entry (struct scratch_reg *sr)
7333 int regno;
7335 sr->saved = false;
7337 if (TARGET_64BIT)
7339 /* We always use R11 in 64-bit mode. */
7340 regno = R11_REG;
7342 else
7344 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
7345 bool fastcall_p
7346 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7347 bool thiscall_p
7348 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7349 bool static_chain_p = DECL_STATIC_CHAIN (decl);
7350 int regparm = ix86_function_regparm (fntype, decl);
7351 int drap_regno
7352 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
7354 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7355 for the static chain register. */
7356 if ((regparm < 1 || (fastcall_p && !static_chain_p))
7357 && drap_regno != AX_REG)
7358 regno = AX_REG;
7359 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7360 for the static chain register. */
7361 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
7362 regno = AX_REG;
7363 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
7364 regno = DX_REG;
7365 /* ecx is the static chain register. */
7366 else if (regparm < 3 && !fastcall_p && !thiscall_p
7367 && !static_chain_p
7368 && drap_regno != CX_REG)
7369 regno = CX_REG;
7370 else if (ix86_save_reg (BX_REG, true, false))
7371 regno = BX_REG;
7372 /* esi is the static chain register. */
7373 else if (!(regparm == 3 && static_chain_p)
7374 && ix86_save_reg (SI_REG, true, false))
7375 regno = SI_REG;
7376 else if (ix86_save_reg (DI_REG, true, false))
7377 regno = DI_REG;
7378 else
7380 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
7381 sr->saved = true;
7385 sr->reg = gen_rtx_REG (Pmode, regno);
7386 if (sr->saved)
7388 rtx_insn *insn = emit_insn (gen_push (sr->reg));
7389 RTX_FRAME_RELATED_P (insn) = 1;
7393 /* Release a scratch register obtained from the preceding function.
7395 If RELEASE_VIA_POP is true, we just pop the register off the stack
7396 to release it. This is what non-Linux systems use with -fstack-check.
7398 Otherwise we use OFFSET to locate the saved register and the
7399 allocated stack space becomes part of the local frame and is
7400 deallocated by the epilogue. */
7402 static void
7403 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
7404 bool release_via_pop)
7406 if (sr->saved)
7408 if (release_via_pop)
7410 struct machine_function *m = cfun->machine;
7411 rtx x, insn = emit_insn (gen_pop (sr->reg));
7413 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
7414 RTX_FRAME_RELATED_P (insn) = 1;
7415 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7416 x = gen_rtx_SET (stack_pointer_rtx, x);
7417 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
7418 m->fs.sp_offset -= UNITS_PER_WORD;
7420 else
7422 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
7423 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
7424 emit_insn (x);
7429 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7431 If INT_REGISTERS_SAVED is true, then integer registers have already been
7432 pushed on the stack.
7434 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
7435 beyond SIZE bytes.
7437 This assumes no knowledge of the current probing state, i.e. it is never
7438 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
7439 a suitable probe. */
7441 static void
7442 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
7443 const bool int_registers_saved,
7444 const bool protection_area)
7446 struct machine_function *m = cfun->machine;
7448 /* If this function does not statically allocate stack space, then
7449 no probes are needed. */
7450 if (!size)
7452 /* However, the allocation of space via pushes for register
7453 saves could be viewed as allocating space, but without the
7454 need to probe. */
7455 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
7456 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7457 else
7458 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
7459 return;
7462 /* If we are a noreturn function, then we have to consider the
7463 possibility that we're called via a jump rather than a call.
7465 Thus we don't have the implicit probe generated by saving the
7466 return address into the stack at the call. Thus, the stack
7467 pointer could be anywhere in the guard page. The safe thing
7468 to do is emit a probe now.
7470 The probe can be avoided if we have already emitted any callee
7471 register saves into the stack or have a frame pointer (which will
7472 have been saved as well). Those saves will function as implicit
7473 probes.
7475 ?!? This should be revamped to work like aarch64 and s390 where
7476 we track the offset from the most recent probe. Normally that
7477 offset would be zero. For a noreturn function we would reset
7478 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
7479 we just probe when we cross PROBE_INTERVAL. */
7480 if (TREE_THIS_VOLATILE (cfun->decl)
7481 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
7483 /* We can safely use any register here since we're just going to push
7484 its value and immediately pop it back. But we do try and avoid
7485 argument passing registers so as not to introduce dependencies in
7486 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
7487 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
7488 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
7489 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
7490 m->fs.sp_offset -= UNITS_PER_WORD;
7491 if (m->fs.cfa_reg == stack_pointer_rtx)
7493 m->fs.cfa_offset -= UNITS_PER_WORD;
7494 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
7495 x = gen_rtx_SET (stack_pointer_rtx, x);
7496 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
7497 RTX_FRAME_RELATED_P (insn_push) = 1;
7498 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7499 x = gen_rtx_SET (stack_pointer_rtx, x);
7500 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
7501 RTX_FRAME_RELATED_P (insn_pop) = 1;
7503 emit_insn (gen_blockage ());
7506 const HOST_WIDE_INT probe_interval = get_probe_interval ();
7507 const int dope = 4 * UNITS_PER_WORD;
7509 /* If there is protection area, take it into account in the size. */
7510 if (protection_area)
7511 size += probe_interval + dope;
7513 /* If we allocate less than the size of the guard statically,
7514 then no probing is necessary, but we do need to allocate
7515 the stack. */
7516 else if (size < (1 << param_stack_clash_protection_guard_size))
7518 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7519 GEN_INT (-size), -1,
7520 m->fs.cfa_reg == stack_pointer_rtx);
7521 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7522 return;
7525 /* We're allocating a large enough stack frame that we need to
7526 emit probes. Either emit them inline or in a loop depending
7527 on the size. */
7528 if (size <= 4 * probe_interval)
7530 HOST_WIDE_INT i;
7531 for (i = probe_interval; i <= size; i += probe_interval)
7533 /* Allocate PROBE_INTERVAL bytes. */
7534 rtx insn
7535 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7536 GEN_INT (-probe_interval), -1,
7537 m->fs.cfa_reg == stack_pointer_rtx);
7538 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
7540 /* And probe at *sp. */
7541 emit_stack_probe (stack_pointer_rtx);
7542 emit_insn (gen_blockage ());
7545 /* We need to allocate space for the residual, but we do not need
7546 to probe the residual... */
7547 HOST_WIDE_INT residual = (i - probe_interval - size);
7548 if (residual)
7550 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7551 GEN_INT (residual), -1,
7552 m->fs.cfa_reg == stack_pointer_rtx);
7554 /* ...except if there is a protection area to maintain. */
7555 if (protection_area)
7556 emit_stack_probe (stack_pointer_rtx);
7559 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
7561 else
7563 /* We expect the GP registers to be saved when probes are used
7564 as the probing sequences might need a scratch register and
7565 the routine to allocate one assumes the integer registers
7566 have already been saved. */
7567 gcc_assert (int_registers_saved);
7569 struct scratch_reg sr;
7570 get_scratch_register_on_entry (&sr);
7572 /* If we needed to save a register, then account for any space
7573 that was pushed (we are not going to pop the register when
7574 we do the restore). */
7575 if (sr.saved)
7576 size -= UNITS_PER_WORD;
7578 /* Step 1: round SIZE down to a multiple of the interval. */
7579 HOST_WIDE_INT rounded_size = size & -probe_interval;
7581 /* Step 2: compute final value of the loop counter. Use lea if
7582 possible. */
7583 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
7584 rtx insn;
7585 if (address_no_seg_operand (addr, Pmode))
7586 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
7587 else
7589 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7590 insn = emit_insn (gen_rtx_SET (sr.reg,
7591 gen_rtx_PLUS (Pmode, sr.reg,
7592 stack_pointer_rtx)));
7594 if (m->fs.cfa_reg == stack_pointer_rtx)
7596 add_reg_note (insn, REG_CFA_DEF_CFA,
7597 plus_constant (Pmode, sr.reg,
7598 m->fs.cfa_offset + rounded_size));
7599 RTX_FRAME_RELATED_P (insn) = 1;
7602 /* Step 3: the loop. */
7603 rtx size_rtx = GEN_INT (rounded_size);
7604 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
7605 size_rtx));
7606 if (m->fs.cfa_reg == stack_pointer_rtx)
7608 m->fs.cfa_offset += rounded_size;
7609 add_reg_note (insn, REG_CFA_DEF_CFA,
7610 plus_constant (Pmode, stack_pointer_rtx,
7611 m->fs.cfa_offset));
7612 RTX_FRAME_RELATED_P (insn) = 1;
7614 m->fs.sp_offset += rounded_size;
7615 emit_insn (gen_blockage ());
7617 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7618 is equal to ROUNDED_SIZE. */
7620 if (size != rounded_size)
7622 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7623 GEN_INT (rounded_size - size), -1,
7624 m->fs.cfa_reg == stack_pointer_rtx);
7626 if (protection_area)
7627 emit_stack_probe (stack_pointer_rtx);
7630 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
7632 /* This does not deallocate the space reserved for the scratch
7633 register. That will be deallocated in the epilogue. */
7634 release_scratch_register_on_entry (&sr, size, false);
7637 /* Adjust back to account for the protection area. */
7638 if (protection_area)
7639 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7640 GEN_INT (probe_interval + dope), -1,
7641 m->fs.cfa_reg == stack_pointer_rtx);
7643 /* Make sure nothing is scheduled before we are done. */
7644 emit_insn (gen_blockage ());
7647 /* Adjust the stack pointer up to REG while probing it. */
7649 const char *
7650 output_adjust_stack_and_probe (rtx reg)
7652 static int labelno = 0;
7653 char loop_lab[32];
7654 rtx xops[2];
7656 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7658 /* Loop. */
7659 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7661 /* SP = SP + PROBE_INTERVAL. */
7662 xops[0] = stack_pointer_rtx;
7663 xops[1] = GEN_INT (get_probe_interval ());
7664 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7666 /* Probe at SP. */
7667 xops[1] = const0_rtx;
7668 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
7670 /* Test if SP == LAST_ADDR. */
7671 xops[0] = stack_pointer_rtx;
7672 xops[1] = reg;
7673 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7675 /* Branch. */
7676 fputs ("\tjne\t", asm_out_file);
7677 assemble_name_raw (asm_out_file, loop_lab);
7678 fputc ('\n', asm_out_file);
7680 return "";
7683 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7684 inclusive. These are offsets from the current stack pointer.
7686 INT_REGISTERS_SAVED is true if integer registers have already been
7687 pushed on the stack. */
7689 static void
7690 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
7691 const bool int_registers_saved)
7693 const HOST_WIDE_INT probe_interval = get_probe_interval ();
7695 /* See if we have a constant small number of probes to generate. If so,
7696 that's the easy case. The run-time loop is made up of 6 insns in the
7697 generic case while the compile-time loop is made up of n insns for n #
7698 of intervals. */
7699 if (size <= 6 * probe_interval)
7701 HOST_WIDE_INT i;
7703 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
7704 it exceeds SIZE. If only one probe is needed, this will not
7705 generate any code. Then probe at FIRST + SIZE. */
7706 for (i = probe_interval; i < size; i += probe_interval)
7707 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7708 -(first + i)));
7710 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7711 -(first + size)));
7714 /* Otherwise, do the same as above, but in a loop. Note that we must be
7715 extra careful with variables wrapping around because we might be at
7716 the very top (or the very bottom) of the address space and we have
7717 to be able to handle this case properly; in particular, we use an
7718 equality test for the loop condition. */
7719 else
7721 /* We expect the GP registers to be saved when probes are used
7722 as the probing sequences might need a scratch register and
7723 the routine to allocate one assumes the integer registers
7724 have already been saved. */
7725 gcc_assert (int_registers_saved);
7727 HOST_WIDE_INT rounded_size, last;
7728 struct scratch_reg sr;
7730 get_scratch_register_on_entry (&sr);
7733 /* Step 1: round SIZE to the previous multiple of the interval. */
7735 rounded_size = ROUND_DOWN (size, probe_interval);
7738 /* Step 2: compute initial and final value of the loop counter. */
7740 /* TEST_OFFSET = FIRST. */
7741 emit_move_insn (sr.reg, GEN_INT (-first));
7743 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
7744 last = first + rounded_size;
7747 /* Step 3: the loop
7751 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
7752 probe at TEST_ADDR
7754 while (TEST_ADDR != LAST_ADDR)
7756 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
7757 until it is equal to ROUNDED_SIZE. */
7759 emit_insn
7760 (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
7763 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
7764 that SIZE is equal to ROUNDED_SIZE. */
7766 if (size != rounded_size)
7767 emit_stack_probe (plus_constant (Pmode,
7768 gen_rtx_PLUS (Pmode,
7769 stack_pointer_rtx,
7770 sr.reg),
7771 rounded_size - size));
7773 release_scratch_register_on_entry (&sr, size, true);
7776 /* Make sure nothing is scheduled before we are done. */
7777 emit_insn (gen_blockage ());
7780 /* Probe a range of stack addresses from REG to END, inclusive. These are
7781 offsets from the current stack pointer. */
7783 const char *
7784 output_probe_stack_range (rtx reg, rtx end)
7786 static int labelno = 0;
7787 char loop_lab[32];
7788 rtx xops[3];
7790 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7792 /* Loop. */
7793 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7795 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
7796 xops[0] = reg;
7797 xops[1] = GEN_INT (get_probe_interval ());
7798 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7800 /* Probe at TEST_ADDR. */
7801 xops[0] = stack_pointer_rtx;
7802 xops[1] = reg;
7803 xops[2] = const0_rtx;
7804 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
7806 /* Test if TEST_ADDR == LAST_ADDR. */
7807 xops[0] = reg;
7808 xops[1] = end;
7809 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7811 /* Branch. */
7812 fputs ("\tjne\t", asm_out_file);
7813 assemble_name_raw (asm_out_file, loop_lab);
7814 fputc ('\n', asm_out_file);
7816 return "";
7819 /* Set stack_frame_required to false if stack frame isn't required.
7820 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
7821 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
7823 static void
7824 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
7825 bool check_stack_slot)
7827 HARD_REG_SET set_up_by_prologue, prologue_used;
7828 basic_block bb;
7830 CLEAR_HARD_REG_SET (prologue_used);
7831 CLEAR_HARD_REG_SET (set_up_by_prologue);
7832 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
7833 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
7834 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
7835 HARD_FRAME_POINTER_REGNUM);
7837 /* The preferred stack alignment is the minimum stack alignment. */
7838 if (stack_alignment > crtl->preferred_stack_boundary)
7839 stack_alignment = crtl->preferred_stack_boundary;
7841 bool require_stack_frame = false;
7843 FOR_EACH_BB_FN (bb, cfun)
7845 rtx_insn *insn;
7846 FOR_BB_INSNS (bb, insn)
7847 if (NONDEBUG_INSN_P (insn)
7848 && requires_stack_frame_p (insn, prologue_used,
7849 set_up_by_prologue))
7851 require_stack_frame = true;
7853 if (check_stack_slot)
7855 /* Find the maximum stack alignment. */
7856 subrtx_iterator::array_type array;
7857 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
7858 if (MEM_P (*iter)
7859 && (reg_mentioned_p (stack_pointer_rtx,
7860 *iter)
7861 || reg_mentioned_p (frame_pointer_rtx,
7862 *iter)))
7864 unsigned int alignment = MEM_ALIGN (*iter);
7865 if (alignment > stack_alignment)
7866 stack_alignment = alignment;
7872 cfun->machine->stack_frame_required = require_stack_frame;
7875 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
7876 will guide prologue/epilogue to be generated in correct form. */
7878 static void
7879 ix86_finalize_stack_frame_flags (void)
7881 /* Check if stack realign is really needed after reload, and
7882 stores result in cfun */
7883 unsigned int incoming_stack_boundary
7884 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7885 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7886 unsigned int stack_alignment
7887 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
7888 ? crtl->max_used_stack_slot_alignment
7889 : crtl->stack_alignment_needed);
7890 unsigned int stack_realign
7891 = (incoming_stack_boundary < stack_alignment);
7892 bool recompute_frame_layout_p = false;
7894 if (crtl->stack_realign_finalized)
7896 /* After stack_realign_needed is finalized, we can't no longer
7897 change it. */
7898 gcc_assert (crtl->stack_realign_needed == stack_realign);
7899 return;
7902 /* It is always safe to compute max_used_stack_alignment. We
7903 compute it only if 128-bit aligned load/store may be generated
7904 on misaligned stack slot which will lead to segfault. */
7905 bool check_stack_slot
7906 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
7907 ix86_find_max_used_stack_alignment (stack_alignment,
7908 check_stack_slot);
7910 /* If the only reason for frame_pointer_needed is that we conservatively
7911 assumed stack realignment might be needed or -fno-omit-frame-pointer
7912 is used, but in the end nothing that needed the stack alignment had
7913 been spilled nor stack access, clear frame_pointer_needed and say we
7914 don't need stack realignment. */
7915 if ((stack_realign || (!flag_omit_frame_pointer && optimize))
7916 && frame_pointer_needed
7917 && crtl->is_leaf
7918 && crtl->sp_is_unchanging
7919 && !ix86_current_function_calls_tls_descriptor
7920 && !crtl->accesses_prior_frames
7921 && !cfun->calls_alloca
7922 && !crtl->calls_eh_return
7923 /* See ira_setup_eliminable_regset for the rationale. */
7924 && !(STACK_CHECK_MOVING_SP
7925 && flag_stack_check
7926 && flag_exceptions
7927 && cfun->can_throw_non_call_exceptions)
7928 && !ix86_frame_pointer_required ()
7929 && ix86_get_frame_size () == 0
7930 && ix86_nsaved_sseregs () == 0
7931 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
7933 if (cfun->machine->stack_frame_required)
7935 /* Stack frame is required. If stack alignment needed is less
7936 than incoming stack boundary, don't realign stack. */
7937 stack_realign = incoming_stack_boundary < stack_alignment;
7938 if (!stack_realign)
7940 crtl->max_used_stack_slot_alignment
7941 = incoming_stack_boundary;
7942 crtl->stack_alignment_needed
7943 = incoming_stack_boundary;
7944 /* Also update preferred_stack_boundary for leaf
7945 functions. */
7946 crtl->preferred_stack_boundary
7947 = incoming_stack_boundary;
7950 else
7952 /* If drap has been set, but it actually isn't live at the
7953 start of the function, there is no reason to set it up. */
7954 if (crtl->drap_reg)
7956 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
7957 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
7958 REGNO (crtl->drap_reg)))
7960 crtl->drap_reg = NULL_RTX;
7961 crtl->need_drap = false;
7964 else
7965 cfun->machine->no_drap_save_restore = true;
7967 frame_pointer_needed = false;
7968 stack_realign = false;
7969 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
7970 crtl->stack_alignment_needed = incoming_stack_boundary;
7971 crtl->stack_alignment_estimated = incoming_stack_boundary;
7972 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
7973 crtl->preferred_stack_boundary = incoming_stack_boundary;
7974 df_finish_pass (true);
7975 df_scan_alloc (NULL);
7976 df_scan_blocks ();
7977 df_compute_regs_ever_live (true);
7978 df_analyze ();
7980 if (flag_var_tracking)
7982 /* Since frame pointer is no longer available, replace it with
7983 stack pointer - UNITS_PER_WORD in debug insns. */
7984 df_ref ref, next;
7985 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
7986 ref; ref = next)
7988 next = DF_REF_NEXT_REG (ref);
7989 if (!DF_REF_INSN_INFO (ref))
7990 continue;
7992 /* Make sure the next ref is for a different instruction,
7993 so that we're not affected by the rescan. */
7994 rtx_insn *insn = DF_REF_INSN (ref);
7995 while (next && DF_REF_INSN (next) == insn)
7996 next = DF_REF_NEXT_REG (next);
7998 if (DEBUG_INSN_P (insn))
8000 bool changed = false;
8001 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
8003 rtx *loc = DF_REF_LOC (ref);
8004 if (*loc == hard_frame_pointer_rtx)
8006 *loc = plus_constant (Pmode,
8007 stack_pointer_rtx,
8008 -UNITS_PER_WORD);
8009 changed = true;
8012 if (changed)
8013 df_insn_rescan (insn);
8018 recompute_frame_layout_p = true;
8021 else if (crtl->max_used_stack_slot_alignment >= 128
8022 && cfun->machine->stack_frame_required)
8024 /* We don't need to realign stack. max_used_stack_alignment is
8025 used to decide how stack frame should be aligned. This is
8026 independent of any psABIs nor 32-bit vs 64-bit. */
8027 cfun->machine->max_used_stack_alignment
8028 = stack_alignment / BITS_PER_UNIT;
8031 if (crtl->stack_realign_needed != stack_realign)
8032 recompute_frame_layout_p = true;
8033 crtl->stack_realign_needed = stack_realign;
8034 crtl->stack_realign_finalized = true;
8035 if (recompute_frame_layout_p)
8036 ix86_compute_frame_layout ();
8039 /* Delete SET_GOT right after entry block if it is allocated to reg. */
8041 static void
8042 ix86_elim_entry_set_got (rtx reg)
8044 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8045 rtx_insn *c_insn = BB_HEAD (bb);
8046 if (!NONDEBUG_INSN_P (c_insn))
8047 c_insn = next_nonnote_nondebug_insn (c_insn);
8048 if (c_insn && NONJUMP_INSN_P (c_insn))
8050 rtx pat = PATTERN (c_insn);
8051 if (GET_CODE (pat) == PARALLEL)
8053 rtx vec = XVECEXP (pat, 0, 0);
8054 if (GET_CODE (vec) == SET
8055 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
8056 && REGNO (XEXP (vec, 0)) == REGNO (reg))
8057 delete_insn (c_insn);
8062 static rtx
8063 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
8065 rtx addr, mem;
8067 if (offset)
8068 addr = plus_constant (Pmode, frame_reg, offset);
8069 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
8070 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
8073 static inline rtx
8074 gen_frame_load (rtx reg, rtx frame_reg, int offset)
8076 return gen_frame_set (reg, frame_reg, offset, false);
8079 static inline rtx
8080 gen_frame_store (rtx reg, rtx frame_reg, int offset)
8082 return gen_frame_set (reg, frame_reg, offset, true);
8085 static void
8086 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
8088 struct machine_function *m = cfun->machine;
8089 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8090 + m->call_ms2sysv_extra_regs;
8091 rtvec v = rtvec_alloc (ncregs + 1);
8092 unsigned int align, i, vi = 0;
8093 rtx_insn *insn;
8094 rtx sym, addr;
8095 rtx rax = gen_rtx_REG (word_mode, AX_REG);
8096 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8098 /* AL should only be live with sysv_abi. */
8099 gcc_assert (!ix86_eax_live_at_start_p ());
8100 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
8102 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
8103 we've actually realigned the stack or not. */
8104 align = GET_MODE_ALIGNMENT (V4SFmode);
8105 addr = choose_baseaddr (frame.stack_realign_offset
8106 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
8107 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8109 emit_insn (gen_rtx_SET (rax, addr));
8111 /* Get the stub symbol. */
8112 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
8113 : XLOGUE_STUB_SAVE);
8114 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8116 for (i = 0; i < ncregs; ++i)
8118 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8119 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
8120 r.regno);
8121 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
8124 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
8126 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
8127 RTX_FRAME_RELATED_P (insn) = true;
8130 /* Generate and return an insn body to AND X with Y. */
8132 static rtx_insn *
8133 gen_and2_insn (rtx x, rtx y)
8135 enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
8137 gcc_assert (insn_operand_matches (icode, 0, x));
8138 gcc_assert (insn_operand_matches (icode, 1, x));
8139 gcc_assert (insn_operand_matches (icode, 2, y));
8141 return GEN_FCN (icode) (x, x, y);
8144 /* Expand the prologue into a bunch of separate insns. */
8146 void
8147 ix86_expand_prologue (void)
8149 struct machine_function *m = cfun->machine;
8150 rtx insn, t;
8151 HOST_WIDE_INT allocate;
8152 bool int_registers_saved;
8153 bool sse_registers_saved;
8154 bool save_stub_call_needed;
8155 rtx static_chain = NULL_RTX;
8157 if (ix86_function_naked (current_function_decl))
8159 if (flag_stack_usage_info)
8160 current_function_static_stack_size = 0;
8161 return;
8164 ix86_finalize_stack_frame_flags ();
8166 /* DRAP should not coexist with stack_realign_fp */
8167 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8169 memset (&m->fs, 0, sizeof (m->fs));
8171 /* Initialize CFA state for before the prologue. */
8172 m->fs.cfa_reg = stack_pointer_rtx;
8173 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
8175 /* Track SP offset to the CFA. We continue tracking this after we've
8176 swapped the CFA register away from SP. In the case of re-alignment
8177 this is fudged; we're interested to offsets within the local frame. */
8178 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8179 m->fs.sp_valid = true;
8180 m->fs.sp_realigned = false;
8182 const struct ix86_frame &frame = cfun->machine->frame;
8184 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
8186 /* We should have already generated an error for any use of
8187 ms_hook on a nested function. */
8188 gcc_checking_assert (!ix86_static_chain_on_stack);
8190 /* Check if profiling is active and we shall use profiling before
8191 prologue variant. If so sorry. */
8192 if (crtl->profile && flag_fentry != 0)
8193 sorry ("%<ms_hook_prologue%> attribute is not compatible "
8194 "with %<-mfentry%> for 32-bit");
8196 /* In ix86_asm_output_function_label we emitted:
8197 8b ff movl.s %edi,%edi
8198 55 push %ebp
8199 8b ec movl.s %esp,%ebp
8201 This matches the hookable function prologue in Win32 API
8202 functions in Microsoft Windows XP Service Pack 2 and newer.
8203 Wine uses this to enable Windows apps to hook the Win32 API
8204 functions provided by Wine.
8206 What that means is that we've already set up the frame pointer. */
8208 if (frame_pointer_needed
8209 && !(crtl->drap_reg && crtl->stack_realign_needed))
8211 rtx push, mov;
8213 /* We've decided to use the frame pointer already set up.
8214 Describe this to the unwinder by pretending that both
8215 push and mov insns happen right here.
8217 Putting the unwind info here at the end of the ms_hook
8218 is done so that we can make absolutely certain we get
8219 the required byte sequence at the start of the function,
8220 rather than relying on an assembler that can produce
8221 the exact encoding required.
8223 However it does mean (in the unpatched case) that we have
8224 a 1 insn window where the asynchronous unwind info is
8225 incorrect. However, if we placed the unwind info at
8226 its correct location we would have incorrect unwind info
8227 in the patched case. Which is probably all moot since
8228 I don't expect Wine generates dwarf2 unwind info for the
8229 system libraries that use this feature. */
8231 insn = emit_insn (gen_blockage ());
8233 push = gen_push (hard_frame_pointer_rtx);
8234 mov = gen_rtx_SET (hard_frame_pointer_rtx,
8235 stack_pointer_rtx);
8236 RTX_FRAME_RELATED_P (push) = 1;
8237 RTX_FRAME_RELATED_P (mov) = 1;
8239 RTX_FRAME_RELATED_P (insn) = 1;
8240 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8241 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
8243 /* Note that gen_push incremented m->fs.cfa_offset, even
8244 though we didn't emit the push insn here. */
8245 m->fs.cfa_reg = hard_frame_pointer_rtx;
8246 m->fs.fp_offset = m->fs.cfa_offset;
8247 m->fs.fp_valid = true;
8249 else
8251 /* The frame pointer is not needed so pop %ebp again.
8252 This leaves us with a pristine state. */
8253 emit_insn (gen_pop (hard_frame_pointer_rtx));
8257 /* The first insn of a function that accepts its static chain on the
8258 stack is to push the register that would be filled in by a direct
8259 call. This insn will be skipped by the trampoline. */
8260 else if (ix86_static_chain_on_stack)
8262 static_chain = ix86_static_chain (cfun->decl, false);
8263 insn = emit_insn (gen_push (static_chain));
8264 emit_insn (gen_blockage ());
8266 /* We don't want to interpret this push insn as a register save,
8267 only as a stack adjustment. The real copy of the register as
8268 a save will be done later, if needed. */
8269 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8270 t = gen_rtx_SET (stack_pointer_rtx, t);
8271 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8272 RTX_FRAME_RELATED_P (insn) = 1;
8275 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8276 of DRAP is needed and stack realignment is really needed after reload */
8277 if (stack_realign_drap)
8279 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8281 /* Can't use DRAP in interrupt function. */
8282 if (cfun->machine->func_type != TYPE_NORMAL)
8283 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8284 "in interrupt service routine. This may be worked "
8285 "around by avoiding functions with aggregate return.");
8287 /* Only need to push parameter pointer reg if it is caller saved. */
8288 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8290 /* Push arg pointer reg */
8291 insn = emit_insn (gen_push (crtl->drap_reg));
8292 RTX_FRAME_RELATED_P (insn) = 1;
8295 /* Grab the argument pointer. */
8296 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
8297 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8298 RTX_FRAME_RELATED_P (insn) = 1;
8299 m->fs.cfa_reg = crtl->drap_reg;
8300 m->fs.cfa_offset = 0;
8302 /* Align the stack. */
8303 insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
8304 GEN_INT (-align_bytes)));
8305 RTX_FRAME_RELATED_P (insn) = 1;
8307 /* Replicate the return address on the stack so that return
8308 address can be reached via (argp - 1) slot. This is needed
8309 to implement macro RETURN_ADDR_RTX and intrinsic function
8310 expand_builtin_return_addr etc. */
8311 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
8312 t = gen_frame_mem (word_mode, t);
8313 insn = emit_insn (gen_push (t));
8314 RTX_FRAME_RELATED_P (insn) = 1;
8316 /* For the purposes of frame and register save area addressing,
8317 we've started over with a new frame. */
8318 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8319 m->fs.realigned = true;
8321 if (static_chain)
8323 /* Replicate static chain on the stack so that static chain
8324 can be reached via (argp - 2) slot. This is needed for
8325 nested function with stack realignment. */
8326 insn = emit_insn (gen_push (static_chain));
8327 RTX_FRAME_RELATED_P (insn) = 1;
8331 int_registers_saved = (frame.nregs == 0);
8332 sse_registers_saved = (frame.nsseregs == 0);
8333 save_stub_call_needed = (m->call_ms2sysv);
8334 gcc_assert (sse_registers_saved || !save_stub_call_needed);
8336 if (frame_pointer_needed && !m->fs.fp_valid)
8338 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8339 slower on all targets. Also sdb didn't like it. */
8340 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8341 RTX_FRAME_RELATED_P (insn) = 1;
8343 /* Push registers now, before setting the frame pointer
8344 on SEH target. */
8345 if (!int_registers_saved
8346 && TARGET_SEH
8347 && !frame.save_regs_using_mov)
8349 ix86_emit_save_regs ();
8350 int_registers_saved = true;
8351 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8354 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
8356 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8357 RTX_FRAME_RELATED_P (insn) = 1;
8359 if (m->fs.cfa_reg == stack_pointer_rtx)
8360 m->fs.cfa_reg = hard_frame_pointer_rtx;
8361 m->fs.fp_offset = m->fs.sp_offset;
8362 m->fs.fp_valid = true;
8366 if (!int_registers_saved)
8368 /* If saving registers via PUSH, do so now. */
8369 if (!frame.save_regs_using_mov)
8371 ix86_emit_save_regs ();
8372 int_registers_saved = true;
8373 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8376 /* When using red zone we may start register saving before allocating
8377 the stack frame saving one cycle of the prologue. However, avoid
8378 doing this if we have to probe the stack; at least on x86_64 the
8379 stack probe can turn into a call that clobbers a red zone location. */
8380 else if (ix86_using_red_zone ()
8381 && (! TARGET_STACK_PROBE
8382 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
8384 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8385 int_registers_saved = true;
8389 if (stack_realign_fp)
8391 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8392 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8394 /* Record last valid frame pointer offset. */
8395 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
8397 /* The computation of the size of the re-aligned stack frame means
8398 that we must allocate the size of the register save area before
8399 performing the actual alignment. Otherwise we cannot guarantee
8400 that there's enough storage above the realignment point. */
8401 allocate = frame.reg_save_offset - m->fs.sp_offset
8402 + frame.stack_realign_allocate;
8403 if (allocate)
8404 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8405 GEN_INT (-allocate), -1, false);
8407 /* Align the stack. */
8408 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
8409 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
8410 m->fs.sp_realigned_offset = m->fs.sp_offset
8411 - frame.stack_realign_allocate;
8412 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8413 Beyond this point, stack access should be done via choose_baseaddr or
8414 by using sp_valid_at and fp_valid_at to determine the correct base
8415 register. Henceforth, any CFA offset should be thought of as logical
8416 and not physical. */
8417 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
8418 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
8419 m->fs.sp_realigned = true;
8421 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8422 is needed to describe where a register is saved using a realigned
8423 stack pointer, so we need to invalidate the stack pointer for that
8424 target. */
8425 if (TARGET_SEH)
8426 m->fs.sp_valid = false;
8428 /* If SP offset is non-immediate after allocation of the stack frame,
8429 then emit SSE saves or stub call prior to allocating the rest of the
8430 stack frame. This is less efficient for the out-of-line stub because
8431 we can't combine allocations across the call barrier, but it's better
8432 than using a scratch register. */
8433 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
8434 - m->fs.sp_realigned_offset),
8435 Pmode))
8437 if (!sse_registers_saved)
8439 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8440 sse_registers_saved = true;
8442 else if (save_stub_call_needed)
8444 ix86_emit_outlined_ms2sysv_save (frame);
8445 save_stub_call_needed = false;
8450 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
8452 if (flag_stack_usage_info)
8454 /* We start to count from ARG_POINTER. */
8455 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
8457 /* If it was realigned, take into account the fake frame. */
8458 if (stack_realign_drap)
8460 if (ix86_static_chain_on_stack)
8461 stack_size += UNITS_PER_WORD;
8463 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8464 stack_size += UNITS_PER_WORD;
8466 /* This over-estimates by 1 minimal-stack-alignment-unit but
8467 mitigates that by counting in the new return address slot. */
8468 current_function_dynamic_stack_size
8469 += crtl->stack_alignment_needed / BITS_PER_UNIT;
8472 current_function_static_stack_size = stack_size;
8475 /* On SEH target with very large frame size, allocate an area to save
8476 SSE registers (as the very large allocation won't be described). */
8477 if (TARGET_SEH
8478 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
8479 && !sse_registers_saved)
8481 HOST_WIDE_INT sse_size
8482 = frame.sse_reg_save_offset - frame.reg_save_offset;
8484 gcc_assert (int_registers_saved);
8486 /* No need to do stack checking as the area will be immediately
8487 written. */
8488 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8489 GEN_INT (-sse_size), -1,
8490 m->fs.cfa_reg == stack_pointer_rtx);
8491 allocate -= sse_size;
8492 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8493 sse_registers_saved = true;
8496 /* If stack clash protection is requested, then probe the stack. */
8497 if (allocate >= 0 && flag_stack_clash_protection)
8499 ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
8500 allocate = 0;
8503 /* The stack has already been decremented by the instruction calling us
8504 so probe if the size is non-negative to preserve the protection area. */
8505 else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8507 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8509 if (STACK_CHECK_MOVING_SP)
8511 if (crtl->is_leaf
8512 && !cfun->calls_alloca
8513 && allocate <= probe_interval)
8516 else
8518 ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
8519 allocate = 0;
8523 else
8525 HOST_WIDE_INT size = allocate;
8527 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
8528 size = 0x80000000 - get_stack_check_protect () - 1;
8530 if (TARGET_STACK_PROBE)
8532 if (crtl->is_leaf && !cfun->calls_alloca)
8534 if (size > probe_interval)
8535 ix86_emit_probe_stack_range (0, size, int_registers_saved);
8537 else
8538 ix86_emit_probe_stack_range (0,
8539 size + get_stack_check_protect (),
8540 int_registers_saved);
8542 else
8544 if (crtl->is_leaf && !cfun->calls_alloca)
8546 if (size > probe_interval
8547 && size > get_stack_check_protect ())
8548 ix86_emit_probe_stack_range (get_stack_check_protect (),
8549 (size
8550 - get_stack_check_protect ()),
8551 int_registers_saved);
8553 else
8554 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
8555 int_registers_saved);
8560 if (allocate == 0)
8562 else if (!ix86_target_stack_probe ()
8563 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
8565 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8566 GEN_INT (-allocate), -1,
8567 m->fs.cfa_reg == stack_pointer_rtx);
8569 else
8571 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8572 rtx r10 = NULL;
8573 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
8574 bool eax_live = ix86_eax_live_at_start_p ();
8575 bool r10_live = false;
8577 if (TARGET_64BIT)
8578 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
8580 if (eax_live)
8582 insn = emit_insn (gen_push (eax));
8583 allocate -= UNITS_PER_WORD;
8584 /* Note that SEH directives need to continue tracking the stack
8585 pointer even after the frame pointer has been set up. */
8586 if (sp_is_cfa_reg || TARGET_SEH)
8588 if (sp_is_cfa_reg)
8589 m->fs.cfa_offset += UNITS_PER_WORD;
8590 RTX_FRAME_RELATED_P (insn) = 1;
8591 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8592 gen_rtx_SET (stack_pointer_rtx,
8593 plus_constant (Pmode,
8594 stack_pointer_rtx,
8595 -UNITS_PER_WORD)));
8599 if (r10_live)
8601 r10 = gen_rtx_REG (Pmode, R10_REG);
8602 insn = emit_insn (gen_push (r10));
8603 allocate -= UNITS_PER_WORD;
8604 if (sp_is_cfa_reg || TARGET_SEH)
8606 if (sp_is_cfa_reg)
8607 m->fs.cfa_offset += UNITS_PER_WORD;
8608 RTX_FRAME_RELATED_P (insn) = 1;
8609 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8610 gen_rtx_SET (stack_pointer_rtx,
8611 plus_constant (Pmode,
8612 stack_pointer_rtx,
8613 -UNITS_PER_WORD)));
8617 emit_move_insn (eax, GEN_INT (allocate));
8618 emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
8620 /* Use the fact that AX still contains ALLOCATE. */
8621 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
8622 (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
8624 if (sp_is_cfa_reg || TARGET_SEH)
8626 if (sp_is_cfa_reg)
8627 m->fs.cfa_offset += allocate;
8628 RTX_FRAME_RELATED_P (insn) = 1;
8629 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8630 gen_rtx_SET (stack_pointer_rtx,
8631 plus_constant (Pmode, stack_pointer_rtx,
8632 -allocate)));
8634 m->fs.sp_offset += allocate;
8636 /* Use stack_pointer_rtx for relative addressing so that code works for
8637 realigned stack. But this means that we need a blockage to prevent
8638 stores based on the frame pointer from being scheduled before. */
8639 if (r10_live && eax_live)
8641 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8642 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
8643 gen_frame_mem (word_mode, t));
8644 t = plus_constant (Pmode, t, UNITS_PER_WORD);
8645 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
8646 gen_frame_mem (word_mode, t));
8647 emit_insn (gen_memory_blockage ());
8649 else if (eax_live || r10_live)
8651 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8652 emit_move_insn (gen_rtx_REG (word_mode,
8653 (eax_live ? AX_REG : R10_REG)),
8654 gen_frame_mem (word_mode, t));
8655 emit_insn (gen_memory_blockage ());
8658 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
8660 /* If we havn't already set up the frame pointer, do so now. */
8661 if (frame_pointer_needed && !m->fs.fp_valid)
8663 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
8664 GEN_INT (frame.stack_pointer_offset
8665 - frame.hard_frame_pointer_offset));
8666 insn = emit_insn (insn);
8667 RTX_FRAME_RELATED_P (insn) = 1;
8668 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
8670 if (m->fs.cfa_reg == stack_pointer_rtx)
8671 m->fs.cfa_reg = hard_frame_pointer_rtx;
8672 m->fs.fp_offset = frame.hard_frame_pointer_offset;
8673 m->fs.fp_valid = true;
8676 if (!int_registers_saved)
8677 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8678 if (!sse_registers_saved)
8679 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8680 else if (save_stub_call_needed)
8681 ix86_emit_outlined_ms2sysv_save (frame);
8683 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8684 in PROLOGUE. */
8685 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
8687 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
8688 insn = emit_insn (gen_set_got (pic));
8689 RTX_FRAME_RELATED_P (insn) = 1;
8690 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
8691 emit_insn (gen_prologue_use (pic));
8692 /* Deleting already emmitted SET_GOT if exist and allocated to
8693 REAL_PIC_OFFSET_TABLE_REGNUM. */
8694 ix86_elim_entry_set_got (pic);
8697 if (crtl->drap_reg && !crtl->stack_realign_needed)
8699 /* vDRAP is setup but after reload it turns out stack realign
8700 isn't necessary, here we will emit prologue to setup DRAP
8701 without stack realign adjustment */
8702 t = choose_baseaddr (0, NULL);
8703 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8706 /* Prevent instructions from being scheduled into register save push
8707 sequence when access to the redzone area is done through frame pointer.
8708 The offset between the frame pointer and the stack pointer is calculated
8709 relative to the value of the stack pointer at the end of the function
8710 prologue, and moving instructions that access redzone area via frame
8711 pointer inside push sequence violates this assumption. */
8712 if (frame_pointer_needed && frame.red_zone_size)
8713 emit_insn (gen_memory_blockage ());
8715 /* SEH requires that the prologue end within 256 bytes of the start of
8716 the function. Prevent instruction schedules that would extend that.
8717 Further, prevent alloca modifications to the stack pointer from being
8718 combined with prologue modifications. */
8719 if (TARGET_SEH)
8720 emit_insn (gen_prologue_use (stack_pointer_rtx));
8723 /* Emit code to restore REG using a POP insn. */
8725 static void
8726 ix86_emit_restore_reg_using_pop (rtx reg)
8728 struct machine_function *m = cfun->machine;
8729 rtx_insn *insn = emit_insn (gen_pop (reg));
8731 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
8732 m->fs.sp_offset -= UNITS_PER_WORD;
8734 if (m->fs.cfa_reg == crtl->drap_reg
8735 && REGNO (reg) == REGNO (crtl->drap_reg))
8737 /* Previously we'd represented the CFA as an expression
8738 like *(%ebp - 8). We've just popped that value from
8739 the stack, which means we need to reset the CFA to
8740 the drap register. This will remain until we restore
8741 the stack pointer. */
8742 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8743 RTX_FRAME_RELATED_P (insn) = 1;
8745 /* This means that the DRAP register is valid for addressing too. */
8746 m->fs.drap_valid = true;
8747 return;
8750 if (m->fs.cfa_reg == stack_pointer_rtx)
8752 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8753 x = gen_rtx_SET (stack_pointer_rtx, x);
8754 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
8755 RTX_FRAME_RELATED_P (insn) = 1;
8757 m->fs.cfa_offset -= UNITS_PER_WORD;
8760 /* When the frame pointer is the CFA, and we pop it, we are
8761 swapping back to the stack pointer as the CFA. This happens
8762 for stack frames that don't allocate other data, so we assume
8763 the stack pointer is now pointing at the return address, i.e.
8764 the function entry state, which makes the offset be 1 word. */
8765 if (reg == hard_frame_pointer_rtx)
8767 m->fs.fp_valid = false;
8768 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8770 m->fs.cfa_reg = stack_pointer_rtx;
8771 m->fs.cfa_offset -= UNITS_PER_WORD;
8773 add_reg_note (insn, REG_CFA_DEF_CFA,
8774 plus_constant (Pmode, stack_pointer_rtx,
8775 m->fs.cfa_offset));
8776 RTX_FRAME_RELATED_P (insn) = 1;
8781 /* Emit code to restore saved registers using POP insns. */
8783 static void
8784 ix86_emit_restore_regs_using_pop (void)
8786 unsigned int regno;
8788 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8789 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
8790 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
8793 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
8794 omits the emit and only attaches the notes. */
8796 static void
8797 ix86_emit_leave (rtx_insn *insn)
8799 struct machine_function *m = cfun->machine;
8801 if (!insn)
8802 insn = emit_insn (gen_leave (word_mode));
8804 ix86_add_queued_cfa_restore_notes (insn);
8806 gcc_assert (m->fs.fp_valid);
8807 m->fs.sp_valid = true;
8808 m->fs.sp_realigned = false;
8809 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
8810 m->fs.fp_valid = false;
8812 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8814 m->fs.cfa_reg = stack_pointer_rtx;
8815 m->fs.cfa_offset = m->fs.sp_offset;
8817 add_reg_note (insn, REG_CFA_DEF_CFA,
8818 plus_constant (Pmode, stack_pointer_rtx,
8819 m->fs.sp_offset));
8820 RTX_FRAME_RELATED_P (insn) = 1;
8822 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
8823 m->fs.fp_offset);
8826 /* Emit code to restore saved registers using MOV insns.
8827 First register is restored from CFA - CFA_OFFSET. */
8828 static void
8829 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
8830 bool maybe_eh_return)
8832 struct machine_function *m = cfun->machine;
8833 unsigned int regno;
8835 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8836 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8838 rtx reg = gen_rtx_REG (word_mode, regno);
8839 rtx mem;
8840 rtx_insn *insn;
8842 mem = choose_baseaddr (cfa_offset, NULL);
8843 mem = gen_frame_mem (word_mode, mem);
8844 insn = emit_move_insn (reg, mem);
8846 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8848 /* Previously we'd represented the CFA as an expression
8849 like *(%ebp - 8). We've just popped that value from
8850 the stack, which means we need to reset the CFA to
8851 the drap register. This will remain until we restore
8852 the stack pointer. */
8853 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8854 RTX_FRAME_RELATED_P (insn) = 1;
8856 /* This means that the DRAP register is valid for addressing. */
8857 m->fs.drap_valid = true;
8859 else
8860 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8862 cfa_offset -= UNITS_PER_WORD;
8866 /* Emit code to restore saved registers using MOV insns.
8867 First register is restored from CFA - CFA_OFFSET. */
8868 static void
8869 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
8870 bool maybe_eh_return)
8872 unsigned int regno;
8874 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8875 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8877 rtx reg = gen_rtx_REG (V4SFmode, regno);
8878 rtx mem;
8879 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
8881 mem = choose_baseaddr (cfa_offset, &align);
8882 mem = gen_rtx_MEM (V4SFmode, mem);
8884 /* The location aligment depends upon the base register. */
8885 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
8886 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
8887 set_mem_align (mem, align);
8888 emit_insn (gen_rtx_SET (reg, mem));
8890 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8892 cfa_offset -= GET_MODE_SIZE (V4SFmode);
8896 static void
8897 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
8898 bool use_call, int style)
8900 struct machine_function *m = cfun->machine;
8901 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8902 + m->call_ms2sysv_extra_regs;
8903 rtvec v;
8904 unsigned int elems_needed, align, i, vi = 0;
8905 rtx_insn *insn;
8906 rtx sym, tmp;
8907 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
8908 rtx r10 = NULL_RTX;
8909 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8910 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
8911 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
8912 rtx rsi_frame_load = NULL_RTX;
8913 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
8914 enum xlogue_stub stub;
8916 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
8918 /* If using a realigned stack, we should never start with padding. */
8919 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
8921 /* Setup RSI as the stub's base pointer. */
8922 align = GET_MODE_ALIGNMENT (V4SFmode);
8923 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
8924 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8926 emit_insn (gen_rtx_SET (rsi, tmp));
8928 /* Get a symbol for the stub. */
8929 if (frame_pointer_needed)
8930 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
8931 : XLOGUE_STUB_RESTORE_HFP_TAIL;
8932 else
8933 stub = use_call ? XLOGUE_STUB_RESTORE
8934 : XLOGUE_STUB_RESTORE_TAIL;
8935 sym = xlogue.get_stub_rtx (stub);
8937 elems_needed = ncregs;
8938 if (use_call)
8939 elems_needed += 1;
8940 else
8941 elems_needed += frame_pointer_needed ? 5 : 3;
8942 v = rtvec_alloc (elems_needed);
8944 /* We call the epilogue stub when we need to pop incoming args or we are
8945 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
8946 epilogue stub and it is the tail-call. */
8947 if (use_call)
8948 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8949 else
8951 RTVEC_ELT (v, vi++) = ret_rtx;
8952 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8953 if (frame_pointer_needed)
8955 rtx rbp = gen_rtx_REG (DImode, BP_REG);
8956 gcc_assert (m->fs.fp_valid);
8957 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
8959 tmp = plus_constant (DImode, rbp, 8);
8960 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
8961 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
8962 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8963 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
8965 else
8967 /* If no hard frame pointer, we set R10 to the SP restore value. */
8968 gcc_assert (!m->fs.fp_valid);
8969 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
8970 gcc_assert (m->fs.sp_valid);
8972 r10 = gen_rtx_REG (DImode, R10_REG);
8973 tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
8974 emit_insn (gen_rtx_SET (r10, tmp));
8976 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
8980 /* Generate frame load insns and restore notes. */
8981 for (i = 0; i < ncregs; ++i)
8983 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8984 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
8985 rtx reg, frame_load;
8987 reg = gen_rtx_REG (mode, r.regno);
8988 frame_load = gen_frame_load (reg, rsi, r.offset);
8990 /* Save RSI frame load insn & note to add last. */
8991 if (r.regno == SI_REG)
8993 gcc_assert (!rsi_frame_load);
8994 rsi_frame_load = frame_load;
8995 rsi_restore_offset = r.offset;
8997 else
8999 RTVEC_ELT (v, vi++) = frame_load;
9000 ix86_add_cfa_restore_note (NULL, reg, r.offset);
9004 /* Add RSI frame load & restore note at the end. */
9005 gcc_assert (rsi_frame_load);
9006 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
9007 RTVEC_ELT (v, vi++) = rsi_frame_load;
9008 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
9009 rsi_restore_offset);
9011 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
9012 if (!use_call && !frame_pointer_needed)
9014 gcc_assert (m->fs.sp_valid);
9015 gcc_assert (!m->fs.sp_realigned);
9017 /* At this point, R10 should point to frame.stack_realign_offset. */
9018 if (m->fs.cfa_reg == stack_pointer_rtx)
9019 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
9020 m->fs.sp_offset = frame.stack_realign_offset;
9023 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
9024 tmp = gen_rtx_PARALLEL (VOIDmode, v);
9025 if (use_call)
9026 insn = emit_insn (tmp);
9027 else
9029 insn = emit_jump_insn (tmp);
9030 JUMP_LABEL (insn) = ret_rtx;
9032 if (frame_pointer_needed)
9033 ix86_emit_leave (insn);
9034 else
9036 /* Need CFA adjust note. */
9037 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
9038 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
9042 RTX_FRAME_RELATED_P (insn) = true;
9043 ix86_add_queued_cfa_restore_notes (insn);
9045 /* If we're not doing a tail-call, we need to adjust the stack. */
9046 if (use_call && m->fs.sp_valid)
9048 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
9049 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9050 GEN_INT (dealloc), style,
9051 m->fs.cfa_reg == stack_pointer_rtx);
9055 /* Restore function stack, frame, and registers. */
9057 void
9058 ix86_expand_epilogue (int style)
9060 struct machine_function *m = cfun->machine;
9061 struct machine_frame_state frame_state_save = m->fs;
9062 bool restore_regs_via_mov;
9063 bool using_drap;
9064 bool restore_stub_is_tail = false;
9066 if (ix86_function_naked (current_function_decl))
9068 /* The program should not reach this point. */
9069 emit_insn (gen_ud2 ());
9070 return;
9073 ix86_finalize_stack_frame_flags ();
9074 const struct ix86_frame &frame = cfun->machine->frame;
9076 m->fs.sp_realigned = stack_realign_fp;
9077 m->fs.sp_valid = stack_realign_fp
9078 || !frame_pointer_needed
9079 || crtl->sp_is_unchanging;
9080 gcc_assert (!m->fs.sp_valid
9081 || m->fs.sp_offset == frame.stack_pointer_offset);
9083 /* The FP must be valid if the frame pointer is present. */
9084 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
9085 gcc_assert (!m->fs.fp_valid
9086 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
9088 /* We must have *some* valid pointer to the stack frame. */
9089 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
9091 /* The DRAP is never valid at this point. */
9092 gcc_assert (!m->fs.drap_valid);
9094 /* See the comment about red zone and frame
9095 pointer usage in ix86_expand_prologue. */
9096 if (frame_pointer_needed && frame.red_zone_size)
9097 emit_insn (gen_memory_blockage ());
9099 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9100 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
9102 /* Determine the CFA offset of the end of the red-zone. */
9103 m->fs.red_zone_offset = 0;
9104 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
9106 /* The red-zone begins below return address and error code in
9107 exception handler. */
9108 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
9110 /* When the register save area is in the aligned portion of
9111 the stack, determine the maximum runtime displacement that
9112 matches up with the aligned frame. */
9113 if (stack_realign_drap)
9114 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
9115 + UNITS_PER_WORD);
9118 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
9120 /* Special care must be taken for the normal return case of a function
9121 using eh_return: the eax and edx registers are marked as saved, but
9122 not restored along this path. Adjust the save location to match. */
9123 if (crtl->calls_eh_return && style != 2)
9124 reg_save_offset -= 2 * UNITS_PER_WORD;
9126 /* EH_RETURN requires the use of moves to function properly. */
9127 if (crtl->calls_eh_return)
9128 restore_regs_via_mov = true;
9129 /* SEH requires the use of pops to identify the epilogue. */
9130 else if (TARGET_SEH)
9131 restore_regs_via_mov = false;
9132 /* If we're only restoring one register and sp cannot be used then
9133 using a move instruction to restore the register since it's
9134 less work than reloading sp and popping the register. */
9135 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
9136 restore_regs_via_mov = true;
9137 else if (TARGET_EPILOGUE_USING_MOVE
9138 && cfun->machine->use_fast_prologue_epilogue
9139 && (frame.nregs > 1
9140 || m->fs.sp_offset != reg_save_offset))
9141 restore_regs_via_mov = true;
9142 else if (frame_pointer_needed
9143 && !frame.nregs
9144 && m->fs.sp_offset != reg_save_offset)
9145 restore_regs_via_mov = true;
9146 else if (frame_pointer_needed
9147 && TARGET_USE_LEAVE
9148 && cfun->machine->use_fast_prologue_epilogue
9149 && frame.nregs == 1)
9150 restore_regs_via_mov = true;
9151 else
9152 restore_regs_via_mov = false;
9154 if (restore_regs_via_mov || frame.nsseregs)
9156 /* Ensure that the entire register save area is addressable via
9157 the stack pointer, if we will restore SSE regs via sp. */
9158 if (TARGET_64BIT
9159 && m->fs.sp_offset > 0x7fffffff
9160 && sp_valid_at (frame.stack_realign_offset + 1)
9161 && (frame.nsseregs + frame.nregs) != 0)
9163 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9164 GEN_INT (m->fs.sp_offset
9165 - frame.sse_reg_save_offset),
9166 style,
9167 m->fs.cfa_reg == stack_pointer_rtx);
9171 /* If there are any SSE registers to restore, then we have to do it
9172 via moves, since there's obviously no pop for SSE regs. */
9173 if (frame.nsseregs)
9174 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
9175 style == 2);
9177 if (m->call_ms2sysv)
9179 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
9181 /* We cannot use a tail-call for the stub if:
9182 1. We have to pop incoming args,
9183 2. We have additional int regs to restore, or
9184 3. A sibling call will be the tail-call, or
9185 4. We are emitting an eh_return_internal epilogue.
9187 TODO: Item 4 has not yet tested!
9189 If any of the above are true, we will call the stub rather than
9190 jump to it. */
9191 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
9192 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
9195 /* If using out-of-line stub that is a tail-call, then...*/
9196 if (m->call_ms2sysv && restore_stub_is_tail)
9198 /* TODO: parinoid tests. (remove eventually) */
9199 gcc_assert (m->fs.sp_valid);
9200 gcc_assert (!m->fs.sp_realigned);
9201 gcc_assert (!m->fs.fp_valid);
9202 gcc_assert (!m->fs.realigned);
9203 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
9204 gcc_assert (!crtl->drap_reg);
9205 gcc_assert (!frame.nregs);
9207 else if (restore_regs_via_mov)
9209 rtx t;
9211 if (frame.nregs)
9212 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
9214 /* eh_return epilogues need %ecx added to the stack pointer. */
9215 if (style == 2)
9217 rtx sa = EH_RETURN_STACKADJ_RTX;
9218 rtx_insn *insn;
9220 /* %ecx can't be used for both DRAP register and eh_return. */
9221 if (crtl->drap_reg)
9222 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
9224 /* regparm nested functions don't work with eh_return. */
9225 gcc_assert (!ix86_static_chain_on_stack);
9227 if (frame_pointer_needed)
9229 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9230 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
9231 emit_insn (gen_rtx_SET (sa, t));
9233 /* NB: eh_return epilogues must restore the frame pointer
9234 in word_mode since the upper 32 bits of RBP register
9235 can have any values. */
9236 t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
9237 rtx frame_reg = gen_rtx_REG (word_mode,
9238 HARD_FRAME_POINTER_REGNUM);
9239 insn = emit_move_insn (frame_reg, t);
9241 /* Note that we use SA as a temporary CFA, as the return
9242 address is at the proper place relative to it. We
9243 pretend this happens at the FP restore insn because
9244 prior to this insn the FP would be stored at the wrong
9245 offset relative to SA, and after this insn we have no
9246 other reasonable register to use for the CFA. We don't
9247 bother resetting the CFA to the SP for the duration of
9248 the return insn, unless the control flow instrumentation
9249 is done. In this case the SP is used later and we have
9250 to reset CFA to SP. */
9251 add_reg_note (insn, REG_CFA_DEF_CFA,
9252 plus_constant (Pmode, sa, UNITS_PER_WORD));
9253 ix86_add_queued_cfa_restore_notes (insn);
9254 add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
9255 RTX_FRAME_RELATED_P (insn) = 1;
9257 m->fs.cfa_reg = sa;
9258 m->fs.cfa_offset = UNITS_PER_WORD;
9259 m->fs.fp_valid = false;
9261 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9262 const0_rtx, style,
9263 flag_cf_protection);
9265 else
9267 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9268 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
9269 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
9270 ix86_add_queued_cfa_restore_notes (insn);
9272 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9273 if (m->fs.cfa_offset != UNITS_PER_WORD)
9275 m->fs.cfa_offset = UNITS_PER_WORD;
9276 add_reg_note (insn, REG_CFA_DEF_CFA,
9277 plus_constant (Pmode, stack_pointer_rtx,
9278 UNITS_PER_WORD));
9279 RTX_FRAME_RELATED_P (insn) = 1;
9282 m->fs.sp_offset = UNITS_PER_WORD;
9283 m->fs.sp_valid = true;
9284 m->fs.sp_realigned = false;
9287 else
9289 /* SEH requires that the function end with (1) a stack adjustment
9290 if necessary, (2) a sequence of pops, and (3) a return or
9291 jump instruction. Prevent insns from the function body from
9292 being scheduled into this sequence. */
9293 if (TARGET_SEH)
9295 /* Prevent a catch region from being adjacent to the standard
9296 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
9297 nor several other flags that would be interesting to test are
9298 set up yet. */
9299 if (flag_non_call_exceptions)
9300 emit_insn (gen_nops (const1_rtx));
9301 else
9302 emit_insn (gen_blockage ());
9305 /* First step is to deallocate the stack frame so that we can
9306 pop the registers. If the stack pointer was realigned, it needs
9307 to be restored now. Also do it on SEH target for very large
9308 frame as the emitted instructions aren't allowed by the ABI
9309 in epilogues. */
9310 if (!m->fs.sp_valid || m->fs.sp_realigned
9311 || (TARGET_SEH
9312 && (m->fs.sp_offset - reg_save_offset
9313 >= SEH_MAX_FRAME_SIZE)))
9315 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
9316 GEN_INT (m->fs.fp_offset
9317 - reg_save_offset),
9318 style, false);
9320 else if (m->fs.sp_offset != reg_save_offset)
9322 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9323 GEN_INT (m->fs.sp_offset
9324 - reg_save_offset),
9325 style,
9326 m->fs.cfa_reg == stack_pointer_rtx);
9329 ix86_emit_restore_regs_using_pop ();
9332 /* If we used a stack pointer and haven't already got rid of it,
9333 then do so now. */
9334 if (m->fs.fp_valid)
9336 /* If the stack pointer is valid and pointing at the frame
9337 pointer store address, then we only need a pop. */
9338 if (sp_valid_at (frame.hfp_save_offset)
9339 && m->fs.sp_offset == frame.hfp_save_offset)
9340 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9341 /* Leave results in shorter dependency chains on CPUs that are
9342 able to grok it fast. */
9343 else if (TARGET_USE_LEAVE
9344 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
9345 || !cfun->machine->use_fast_prologue_epilogue)
9346 ix86_emit_leave (NULL);
9347 else
9349 pro_epilogue_adjust_stack (stack_pointer_rtx,
9350 hard_frame_pointer_rtx,
9351 const0_rtx, style, !using_drap);
9352 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9356 if (using_drap)
9358 int param_ptr_offset = UNITS_PER_WORD;
9359 rtx_insn *insn;
9361 gcc_assert (stack_realign_drap);
9363 if (ix86_static_chain_on_stack)
9364 param_ptr_offset += UNITS_PER_WORD;
9365 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9366 param_ptr_offset += UNITS_PER_WORD;
9368 insn = emit_insn (gen_rtx_SET
9369 (stack_pointer_rtx,
9370 plus_constant (Pmode, crtl->drap_reg,
9371 -param_ptr_offset)));
9372 m->fs.cfa_reg = stack_pointer_rtx;
9373 m->fs.cfa_offset = param_ptr_offset;
9374 m->fs.sp_offset = param_ptr_offset;
9375 m->fs.realigned = false;
9377 add_reg_note (insn, REG_CFA_DEF_CFA,
9378 plus_constant (Pmode, stack_pointer_rtx,
9379 param_ptr_offset));
9380 RTX_FRAME_RELATED_P (insn) = 1;
9382 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9383 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
9386 /* At this point the stack pointer must be valid, and we must have
9387 restored all of the registers. We may not have deallocated the
9388 entire stack frame. We've delayed this until now because it may
9389 be possible to merge the local stack deallocation with the
9390 deallocation forced by ix86_static_chain_on_stack. */
9391 gcc_assert (m->fs.sp_valid);
9392 gcc_assert (!m->fs.sp_realigned);
9393 gcc_assert (!m->fs.fp_valid);
9394 gcc_assert (!m->fs.realigned);
9395 if (m->fs.sp_offset != UNITS_PER_WORD)
9397 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9398 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
9399 style, true);
9401 else
9402 ix86_add_queued_cfa_restore_notes (get_last_insn ());
9404 /* Sibcall epilogues don't want a return instruction. */
9405 if (style == 0)
9407 m->fs = frame_state_save;
9408 return;
9411 if (cfun->machine->func_type != TYPE_NORMAL)
9412 emit_jump_insn (gen_interrupt_return ());
9413 else if (crtl->args.pops_args && crtl->args.size)
9415 rtx popc = GEN_INT (crtl->args.pops_args);
9417 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9418 address, do explicit add, and jump indirectly to the caller. */
9420 if (crtl->args.pops_args >= 65536)
9422 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9423 rtx_insn *insn;
9425 /* There is no "pascal" calling convention in any 64bit ABI. */
9426 gcc_assert (!TARGET_64BIT);
9428 insn = emit_insn (gen_pop (ecx));
9429 m->fs.cfa_offset -= UNITS_PER_WORD;
9430 m->fs.sp_offset -= UNITS_PER_WORD;
9432 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9433 x = gen_rtx_SET (stack_pointer_rtx, x);
9434 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9435 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9436 RTX_FRAME_RELATED_P (insn) = 1;
9438 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9439 popc, -1, true);
9440 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9442 else
9443 emit_jump_insn (gen_simple_return_pop_internal (popc));
9445 else if (!m->call_ms2sysv || !restore_stub_is_tail)
9447 /* In case of return from EH a simple return cannot be used
9448 as a return address will be compared with a shadow stack
9449 return address. Use indirect jump instead. */
9450 if (style == 2 && flag_cf_protection)
9452 /* Register used in indirect jump must be in word_mode. But
9453 Pmode may not be the same as word_mode for x32. */
9454 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
9455 rtx_insn *insn;
9457 insn = emit_insn (gen_pop (ecx));
9458 m->fs.cfa_offset -= UNITS_PER_WORD;
9459 m->fs.sp_offset -= UNITS_PER_WORD;
9461 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9462 x = gen_rtx_SET (stack_pointer_rtx, x);
9463 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9464 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9465 RTX_FRAME_RELATED_P (insn) = 1;
9467 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9469 else
9470 emit_jump_insn (gen_simple_return_internal ());
9473 /* Restore the state back to the state from the prologue,
9474 so that it's correct for the next epilogue. */
9475 m->fs = frame_state_save;
9478 /* Reset from the function's potential modifications. */
9480 static void
9481 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
9483 if (pic_offset_table_rtx
9484 && !ix86_use_pseudo_pic_reg ())
9485 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9487 if (TARGET_MACHO)
9489 rtx_insn *insn = get_last_insn ();
9490 rtx_insn *deleted_debug_label = NULL;
9492 /* Mach-O doesn't support labels at the end of objects, so if
9493 it looks like we might want one, take special action.
9494 First, collect any sequence of deleted debug labels. */
9495 while (insn
9496 && NOTE_P (insn)
9497 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9499 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9500 notes only, instead set their CODE_LABEL_NUMBER to -1,
9501 otherwise there would be code generation differences
9502 in between -g and -g0. */
9503 if (NOTE_P (insn) && NOTE_KIND (insn)
9504 == NOTE_INSN_DELETED_DEBUG_LABEL)
9505 deleted_debug_label = insn;
9506 insn = PREV_INSN (insn);
9509 /* If we have:
9510 label:
9511 barrier
9512 then this needs to be detected, so skip past the barrier. */
9514 if (insn && BARRIER_P (insn))
9515 insn = PREV_INSN (insn);
9517 /* Up to now we've only seen notes or barriers. */
9518 if (insn)
9520 if (LABEL_P (insn)
9521 || (NOTE_P (insn)
9522 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
9523 /* Trailing label. */
9524 fputs ("\tnop\n", file);
9525 else if (cfun && ! cfun->is_thunk)
9527 /* See if we have a completely empty function body, skipping
9528 the special case of the picbase thunk emitted as asm. */
9529 while (insn && ! INSN_P (insn))
9530 insn = PREV_INSN (insn);
9531 /* If we don't find any insns, we've got an empty function body;
9532 I.e. completely empty - without a return or branch. This is
9533 taken as the case where a function body has been removed
9534 because it contains an inline __builtin_unreachable(). GCC
9535 declares that reaching __builtin_unreachable() means UB so
9536 we're not obliged to do anything special; however, we want
9537 non-zero-sized function bodies. To meet this, and help the
9538 user out, let's trap the case. */
9539 if (insn == NULL)
9540 fputs ("\tud2\n", file);
9543 else if (deleted_debug_label)
9544 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
9545 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
9546 CODE_LABEL_NUMBER (insn) = -1;
9550 /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
9552 void
9553 ix86_print_patchable_function_entry (FILE *file,
9554 unsigned HOST_WIDE_INT patch_area_size,
9555 bool record_p)
9557 if (cfun->machine->function_label_emitted)
9559 /* NB: When ix86_print_patchable_function_entry is called after
9560 function table has been emitted, we have inserted or queued
9561 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
9562 place. There is nothing to do here. */
9563 return;
9566 default_print_patchable_function_entry (file, patch_area_size,
9567 record_p);
9570 /* Output patchable area. NB: default_print_patchable_function_entry
9571 isn't available in i386.md. */
9573 void
9574 ix86_output_patchable_area (unsigned int patch_area_size,
9575 bool record_p)
9577 default_print_patchable_function_entry (asm_out_file,
9578 patch_area_size,
9579 record_p);
9582 /* Return a scratch register to use in the split stack prologue. The
9583 split stack prologue is used for -fsplit-stack. It is the first
9584 instructions in the function, even before the regular prologue.
9585 The scratch register can be any caller-saved register which is not
9586 used for parameters or for the static chain. */
9588 static unsigned int
9589 split_stack_prologue_scratch_regno (void)
9591 if (TARGET_64BIT)
9592 return R11_REG;
9593 else
9595 bool is_fastcall, is_thiscall;
9596 int regparm;
9598 is_fastcall = (lookup_attribute ("fastcall",
9599 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9600 != NULL);
9601 is_thiscall = (lookup_attribute ("thiscall",
9602 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9603 != NULL);
9604 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
9606 if (is_fastcall)
9608 if (DECL_STATIC_CHAIN (cfun->decl))
9610 sorry ("%<-fsplit-stack%> does not support fastcall with "
9611 "nested function");
9612 return INVALID_REGNUM;
9614 return AX_REG;
9616 else if (is_thiscall)
9618 if (!DECL_STATIC_CHAIN (cfun->decl))
9619 return DX_REG;
9620 return AX_REG;
9622 else if (regparm < 3)
9624 if (!DECL_STATIC_CHAIN (cfun->decl))
9625 return CX_REG;
9626 else
9628 if (regparm >= 2)
9630 sorry ("%<-fsplit-stack%> does not support 2 register "
9631 "parameters for a nested function");
9632 return INVALID_REGNUM;
9634 return DX_REG;
9637 else
9639 /* FIXME: We could make this work by pushing a register
9640 around the addition and comparison. */
9641 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9642 return INVALID_REGNUM;
9647 /* A SYMBOL_REF for the function which allocates new stackspace for
9648 -fsplit-stack. */
9650 static GTY(()) rtx split_stack_fn;
9652 /* A SYMBOL_REF for the more stack function when using the large
9653 model. */
9655 static GTY(()) rtx split_stack_fn_large;
9657 /* Return location of the stack guard value in the TLS block. */
9660 ix86_split_stack_guard (void)
9662 int offset;
9663 addr_space_t as = DEFAULT_TLS_SEG_REG;
9664 rtx r;
9666 gcc_assert (flag_split_stack);
9668 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9669 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
9670 #else
9671 gcc_unreachable ();
9672 #endif
9674 r = GEN_INT (offset);
9675 r = gen_const_mem (Pmode, r);
9676 set_mem_addr_space (r, as);
9678 return r;
9681 /* Handle -fsplit-stack. These are the first instructions in the
9682 function, even before the regular prologue. */
9684 void
9685 ix86_expand_split_stack_prologue (void)
9687 HOST_WIDE_INT allocate;
9688 unsigned HOST_WIDE_INT args_size;
9689 rtx_code_label *label;
9690 rtx limit, current, allocate_rtx, call_fusage;
9691 rtx_insn *call_insn;
9692 rtx scratch_reg = NULL_RTX;
9693 rtx_code_label *varargs_label = NULL;
9694 rtx fn;
9696 gcc_assert (flag_split_stack && reload_completed);
9698 ix86_finalize_stack_frame_flags ();
9699 struct ix86_frame &frame = cfun->machine->frame;
9700 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
9702 /* This is the label we will branch to if we have enough stack
9703 space. We expect the basic block reordering pass to reverse this
9704 branch if optimizing, so that we branch in the unlikely case. */
9705 label = gen_label_rtx ();
9707 /* We need to compare the stack pointer minus the frame size with
9708 the stack boundary in the TCB. The stack boundary always gives
9709 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
9710 can compare directly. Otherwise we need to do an addition. */
9712 limit = ix86_split_stack_guard ();
9714 if (allocate < SPLIT_STACK_AVAILABLE)
9715 current = stack_pointer_rtx;
9716 else
9718 unsigned int scratch_regno;
9719 rtx offset;
9721 /* We need a scratch register to hold the stack pointer minus
9722 the required frame size. Since this is the very start of the
9723 function, the scratch register can be any caller-saved
9724 register which is not used for parameters. */
9725 offset = GEN_INT (- allocate);
9726 scratch_regno = split_stack_prologue_scratch_regno ();
9727 if (scratch_regno == INVALID_REGNUM)
9728 return;
9729 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9730 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
9732 /* We don't use gen_add in this case because it will
9733 want to split to lea, but when not optimizing the insn
9734 will not be split after this point. */
9735 emit_insn (gen_rtx_SET (scratch_reg,
9736 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9737 offset)));
9739 else
9741 emit_move_insn (scratch_reg, offset);
9742 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
9744 current = scratch_reg;
9747 ix86_expand_branch (GEU, current, limit, label);
9748 rtx_insn *jump_insn = get_last_insn ();
9749 JUMP_LABEL (jump_insn) = label;
9751 /* Mark the jump as very likely to be taken. */
9752 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
9754 if (split_stack_fn == NULL_RTX)
9756 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
9757 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
9759 fn = split_stack_fn;
9761 /* Get more stack space. We pass in the desired stack space and the
9762 size of the arguments to copy to the new stack. In 32-bit mode
9763 we push the parameters; __morestack will return on a new stack
9764 anyhow. In 64-bit mode we pass the parameters in r10 and
9765 r11. */
9766 allocate_rtx = GEN_INT (allocate);
9767 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
9768 call_fusage = NULL_RTX;
9769 rtx pop = NULL_RTX;
9770 if (TARGET_64BIT)
9772 rtx reg10, reg11;
9774 reg10 = gen_rtx_REG (Pmode, R10_REG);
9775 reg11 = gen_rtx_REG (Pmode, R11_REG);
9777 /* If this function uses a static chain, it will be in %r10.
9778 Preserve it across the call to __morestack. */
9779 if (DECL_STATIC_CHAIN (cfun->decl))
9781 rtx rax;
9783 rax = gen_rtx_REG (word_mode, AX_REG);
9784 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
9785 use_reg (&call_fusage, rax);
9788 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
9789 && !TARGET_PECOFF)
9791 HOST_WIDE_INT argval;
9793 gcc_assert (Pmode == DImode);
9794 /* When using the large model we need to load the address
9795 into a register, and we've run out of registers. So we
9796 switch to a different calling convention, and we call a
9797 different function: __morestack_large. We pass the
9798 argument size in the upper 32 bits of r10 and pass the
9799 frame size in the lower 32 bits. */
9800 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
9801 gcc_assert ((args_size & 0xffffffff) == args_size);
9803 if (split_stack_fn_large == NULL_RTX)
9805 split_stack_fn_large
9806 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
9807 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
9809 if (ix86_cmodel == CM_LARGE_PIC)
9811 rtx_code_label *label;
9812 rtx x;
9814 label = gen_label_rtx ();
9815 emit_label (label);
9816 LABEL_PRESERVE_P (label) = 1;
9817 emit_insn (gen_set_rip_rex64 (reg10, label));
9818 emit_insn (gen_set_got_offset_rex64 (reg11, label));
9819 emit_insn (gen_add2_insn (reg10, reg11));
9820 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
9821 UNSPEC_GOT);
9822 x = gen_rtx_CONST (Pmode, x);
9823 emit_move_insn (reg11, x);
9824 x = gen_rtx_PLUS (Pmode, reg10, reg11);
9825 x = gen_const_mem (Pmode, x);
9826 emit_move_insn (reg11, x);
9828 else
9829 emit_move_insn (reg11, split_stack_fn_large);
9831 fn = reg11;
9833 argval = ((args_size << 16) << 16) + allocate;
9834 emit_move_insn (reg10, GEN_INT (argval));
9836 else
9838 emit_move_insn (reg10, allocate_rtx);
9839 emit_move_insn (reg11, GEN_INT (args_size));
9840 use_reg (&call_fusage, reg11);
9843 use_reg (&call_fusage, reg10);
9845 else
9847 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
9848 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
9849 insn = emit_insn (gen_push (allocate_rtx));
9850 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
9851 pop = GEN_INT (2 * UNITS_PER_WORD);
9853 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
9854 GEN_INT (UNITS_PER_WORD), constm1_rtx,
9855 pop, false);
9856 add_function_usage_to (call_insn, call_fusage);
9857 if (!TARGET_64BIT)
9858 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
9859 /* Indicate that this function can't jump to non-local gotos. */
9860 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
9862 /* In order to make call/return prediction work right, we now need
9863 to execute a return instruction. See
9864 libgcc/config/i386/morestack.S for the details on how this works.
9866 For flow purposes gcc must not see this as a return
9867 instruction--we need control flow to continue at the subsequent
9868 label. Therefore, we use an unspec. */
9869 gcc_assert (crtl->args.pops_args < 65536);
9870 rtx_insn *ret_insn
9871 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
9873 if ((flag_cf_protection & CF_BRANCH))
9875 /* Insert ENDBR since __morestack will jump back here via indirect
9876 call. */
9877 rtx cet_eb = gen_nop_endbr ();
9878 emit_insn_after (cet_eb, ret_insn);
9881 /* If we are in 64-bit mode and this function uses a static chain,
9882 we saved %r10 in %rax before calling _morestack. */
9883 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
9884 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9885 gen_rtx_REG (word_mode, AX_REG));
9887 /* If this function calls va_start, we need to store a pointer to
9888 the arguments on the old stack, because they may not have been
9889 all copied to the new stack. At this point the old stack can be
9890 found at the frame pointer value used by __morestack, because
9891 __morestack has set that up before calling back to us. Here we
9892 store that pointer in a scratch register, and in
9893 ix86_expand_prologue we store the scratch register in a stack
9894 slot. */
9895 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9897 unsigned int scratch_regno;
9898 rtx frame_reg;
9899 int words;
9901 scratch_regno = split_stack_prologue_scratch_regno ();
9902 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9903 frame_reg = gen_rtx_REG (Pmode, BP_REG);
9905 /* 64-bit:
9906 fp -> old fp value
9907 return address within this function
9908 return address of caller of this function
9909 stack arguments
9910 So we add three words to get to the stack arguments.
9912 32-bit:
9913 fp -> old fp value
9914 return address within this function
9915 first argument to __morestack
9916 second argument to __morestack
9917 return address of caller of this function
9918 stack arguments
9919 So we add five words to get to the stack arguments.
9921 words = TARGET_64BIT ? 3 : 5;
9922 emit_insn (gen_rtx_SET (scratch_reg,
9923 plus_constant (Pmode, frame_reg,
9924 words * UNITS_PER_WORD)));
9926 varargs_label = gen_label_rtx ();
9927 emit_jump_insn (gen_jump (varargs_label));
9928 JUMP_LABEL (get_last_insn ()) = varargs_label;
9930 emit_barrier ();
9933 emit_label (label);
9934 LABEL_NUSES (label) = 1;
9936 /* If this function calls va_start, we now have to set the scratch
9937 register for the case where we do not call __morestack. In this
9938 case we need to set it based on the stack pointer. */
9939 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9941 emit_insn (gen_rtx_SET (scratch_reg,
9942 plus_constant (Pmode, stack_pointer_rtx,
9943 UNITS_PER_WORD)));
9945 emit_label (varargs_label);
9946 LABEL_NUSES (varargs_label) = 1;
9950 /* We may have to tell the dataflow pass that the split stack prologue
9951 is initializing a scratch register. */
9953 static void
9954 ix86_live_on_entry (bitmap regs)
9956 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9958 gcc_assert (flag_split_stack);
9959 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
9963 /* Extract the parts of an RTL expression that is a valid memory address
9964 for an instruction. Return 0 if the structure of the address is
9965 grossly off. Return -1 if the address contains ASHIFT, so it is not
9966 strictly valid, but still used for computing length of lea instruction. */
9969 ix86_decompose_address (rtx addr, struct ix86_address *out)
9971 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9972 rtx base_reg, index_reg;
9973 HOST_WIDE_INT scale = 1;
9974 rtx scale_rtx = NULL_RTX;
9975 rtx tmp;
9976 int retval = 1;
9977 addr_space_t seg = ADDR_SPACE_GENERIC;
9979 /* Allow zero-extended SImode addresses,
9980 they will be emitted with addr32 prefix. */
9981 if (TARGET_64BIT && GET_MODE (addr) == DImode)
9983 if (GET_CODE (addr) == ZERO_EXTEND
9984 && GET_MODE (XEXP (addr, 0)) == SImode)
9986 addr = XEXP (addr, 0);
9987 if (CONST_INT_P (addr))
9988 return 0;
9990 else if (GET_CODE (addr) == AND
9991 && const_32bit_mask (XEXP (addr, 1), DImode))
9993 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
9994 if (addr == NULL_RTX)
9995 return 0;
9997 if (CONST_INT_P (addr))
9998 return 0;
10002 /* Allow SImode subregs of DImode addresses,
10003 they will be emitted with addr32 prefix. */
10004 if (TARGET_64BIT && GET_MODE (addr) == SImode)
10006 if (SUBREG_P (addr)
10007 && GET_MODE (SUBREG_REG (addr)) == DImode)
10009 addr = SUBREG_REG (addr);
10010 if (CONST_INT_P (addr))
10011 return 0;
10015 if (REG_P (addr))
10016 base = addr;
10017 else if (SUBREG_P (addr))
10019 if (REG_P (SUBREG_REG (addr)))
10020 base = addr;
10021 else
10022 return 0;
10024 else if (GET_CODE (addr) == PLUS)
10026 rtx addends[4], op;
10027 int n = 0, i;
10029 op = addr;
10032 if (n >= 4)
10033 return 0;
10034 addends[n++] = XEXP (op, 1);
10035 op = XEXP (op, 0);
10037 while (GET_CODE (op) == PLUS);
10038 if (n >= 4)
10039 return 0;
10040 addends[n] = op;
10042 for (i = n; i >= 0; --i)
10044 op = addends[i];
10045 switch (GET_CODE (op))
10047 case MULT:
10048 if (index)
10049 return 0;
10050 index = XEXP (op, 0);
10051 scale_rtx = XEXP (op, 1);
10052 break;
10054 case ASHIFT:
10055 if (index)
10056 return 0;
10057 index = XEXP (op, 0);
10058 tmp = XEXP (op, 1);
10059 if (!CONST_INT_P (tmp))
10060 return 0;
10061 scale = INTVAL (tmp);
10062 if ((unsigned HOST_WIDE_INT) scale > 3)
10063 return 0;
10064 scale = 1 << scale;
10065 break;
10067 case ZERO_EXTEND:
10068 op = XEXP (op, 0);
10069 if (GET_CODE (op) != UNSPEC)
10070 return 0;
10071 /* FALLTHRU */
10073 case UNSPEC:
10074 if (XINT (op, 1) == UNSPEC_TP
10075 && TARGET_TLS_DIRECT_SEG_REFS
10076 && seg == ADDR_SPACE_GENERIC)
10077 seg = DEFAULT_TLS_SEG_REG;
10078 else
10079 return 0;
10080 break;
10082 case SUBREG:
10083 if (!REG_P (SUBREG_REG (op)))
10084 return 0;
10085 /* FALLTHRU */
10087 case REG:
10088 if (!base)
10089 base = op;
10090 else if (!index)
10091 index = op;
10092 else
10093 return 0;
10094 break;
10096 case CONST:
10097 case CONST_INT:
10098 case SYMBOL_REF:
10099 case LABEL_REF:
10100 if (disp)
10101 return 0;
10102 disp = op;
10103 break;
10105 default:
10106 return 0;
10110 else if (GET_CODE (addr) == MULT)
10112 index = XEXP (addr, 0); /* index*scale */
10113 scale_rtx = XEXP (addr, 1);
10115 else if (GET_CODE (addr) == ASHIFT)
10117 /* We're called for lea too, which implements ashift on occasion. */
10118 index = XEXP (addr, 0);
10119 tmp = XEXP (addr, 1);
10120 if (!CONST_INT_P (tmp))
10121 return 0;
10122 scale = INTVAL (tmp);
10123 if ((unsigned HOST_WIDE_INT) scale > 3)
10124 return 0;
10125 scale = 1 << scale;
10126 retval = -1;
10128 else
10129 disp = addr; /* displacement */
10131 if (index)
10133 if (REG_P (index))
10135 else if (SUBREG_P (index)
10136 && REG_P (SUBREG_REG (index)))
10138 else
10139 return 0;
10142 /* Extract the integral value of scale. */
10143 if (scale_rtx)
10145 if (!CONST_INT_P (scale_rtx))
10146 return 0;
10147 scale = INTVAL (scale_rtx);
10150 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
10151 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
10153 /* Avoid useless 0 displacement. */
10154 if (disp == const0_rtx && (base || index))
10155 disp = NULL_RTX;
10157 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10158 if (base_reg && index_reg && scale == 1
10159 && (REGNO (index_reg) == ARG_POINTER_REGNUM
10160 || REGNO (index_reg) == FRAME_POINTER_REGNUM
10161 || REGNO (index_reg) == SP_REG))
10163 std::swap (base, index);
10164 std::swap (base_reg, index_reg);
10167 /* Special case: %ebp cannot be encoded as a base without a displacement.
10168 Similarly %r13. */
10169 if (!disp && base_reg
10170 && (REGNO (base_reg) == ARG_POINTER_REGNUM
10171 || REGNO (base_reg) == FRAME_POINTER_REGNUM
10172 || REGNO (base_reg) == BP_REG
10173 || REGNO (base_reg) == R13_REG))
10174 disp = const0_rtx;
10176 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10177 Avoid this by transforming to [%esi+0].
10178 Reload calls address legitimization without cfun defined, so we need
10179 to test cfun for being non-NULL. */
10180 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10181 && base_reg && !index_reg && !disp
10182 && REGNO (base_reg) == SI_REG)
10183 disp = const0_rtx;
10185 /* Special case: encode reg+reg instead of reg*2. */
10186 if (!base && index && scale == 2)
10187 base = index, base_reg = index_reg, scale = 1;
10189 /* Special case: scaling cannot be encoded without base or displacement. */
10190 if (!base && !disp && index && scale != 1)
10191 disp = const0_rtx;
10193 out->base = base;
10194 out->index = index;
10195 out->disp = disp;
10196 out->scale = scale;
10197 out->seg = seg;
10199 return retval;
10202 /* Return cost of the memory address x.
10203 For i386, it is better to use a complex address than let gcc copy
10204 the address into a reg and make a new pseudo. But not if the address
10205 requires to two regs - that would mean more pseudos with longer
10206 lifetimes. */
10207 static int
10208 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
10210 struct ix86_address parts;
10211 int cost = 1;
10212 int ok = ix86_decompose_address (x, &parts);
10214 gcc_assert (ok);
10216 if (parts.base && SUBREG_P (parts.base))
10217 parts.base = SUBREG_REG (parts.base);
10218 if (parts.index && SUBREG_P (parts.index))
10219 parts.index = SUBREG_REG (parts.index);
10221 /* Attempt to minimize number of registers in the address by increasing
10222 address cost for each used register. We don't increase address cost
10223 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
10224 is not invariant itself it most likely means that base or index is not
10225 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
10226 which is not profitable for x86. */
10227 if (parts.base
10228 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10229 && (current_pass->type == GIMPLE_PASS
10230 || !pic_offset_table_rtx
10231 || !REG_P (parts.base)
10232 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
10233 cost++;
10235 if (parts.index
10236 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10237 && (current_pass->type == GIMPLE_PASS
10238 || !pic_offset_table_rtx
10239 || !REG_P (parts.index)
10240 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
10241 cost++;
10243 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10244 since it's predecode logic can't detect the length of instructions
10245 and it degenerates to vector decoded. Increase cost of such
10246 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10247 to split such addresses or even refuse such addresses at all.
10249 Following addressing modes are affected:
10250 [base+scale*index]
10251 [scale*index+disp]
10252 [base+index]
10254 The first and last case may be avoidable by explicitly coding the zero in
10255 memory address, but I don't have AMD-K6 machine handy to check this
10256 theory. */
10258 if (TARGET_K6
10259 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10260 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10261 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10262 cost += 10;
10264 return cost;
10267 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10268 this is used for to form addresses to local data when -fPIC is in
10269 use. */
10271 static bool
10272 darwin_local_data_pic (rtx disp)
10274 return (GET_CODE (disp) == UNSPEC
10275 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10278 /* True if operand X should be loaded from GOT. */
10280 bool
10281 ix86_force_load_from_GOT_p (rtx x)
10283 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
10284 && !TARGET_PECOFF && !TARGET_MACHO
10285 && !flag_pic
10286 && ix86_cmodel != CM_LARGE
10287 && GET_CODE (x) == SYMBOL_REF
10288 && SYMBOL_REF_FUNCTION_P (x)
10289 && (!flag_plt
10290 || (SYMBOL_REF_DECL (x)
10291 && lookup_attribute ("noplt",
10292 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))
10293 && !SYMBOL_REF_LOCAL_P (x));
10296 /* Determine if a given RTX is a valid constant. We already know this
10297 satisfies CONSTANT_P. */
10299 static bool
10300 ix86_legitimate_constant_p (machine_mode mode, rtx x)
10302 switch (GET_CODE (x))
10304 case CONST:
10305 x = XEXP (x, 0);
10307 if (GET_CODE (x) == PLUS)
10309 if (!CONST_INT_P (XEXP (x, 1)))
10310 return false;
10311 x = XEXP (x, 0);
10314 if (TARGET_MACHO && darwin_local_data_pic (x))
10315 return true;
10317 /* Only some unspecs are valid as "constants". */
10318 if (GET_CODE (x) == UNSPEC)
10319 switch (XINT (x, 1))
10321 case UNSPEC_GOT:
10322 case UNSPEC_GOTOFF:
10323 case UNSPEC_PLTOFF:
10324 return TARGET_64BIT;
10325 case UNSPEC_TPOFF:
10326 case UNSPEC_NTPOFF:
10327 x = XVECEXP (x, 0, 0);
10328 return (GET_CODE (x) == SYMBOL_REF
10329 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10330 case UNSPEC_DTPOFF:
10331 x = XVECEXP (x, 0, 0);
10332 return (GET_CODE (x) == SYMBOL_REF
10333 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10334 default:
10335 return false;
10338 /* We must have drilled down to a symbol. */
10339 if (GET_CODE (x) == LABEL_REF)
10340 return true;
10341 if (GET_CODE (x) != SYMBOL_REF)
10342 return false;
10343 /* FALLTHRU */
10345 case SYMBOL_REF:
10346 /* TLS symbols are never valid. */
10347 if (SYMBOL_REF_TLS_MODEL (x))
10348 return false;
10350 /* DLLIMPORT symbols are never valid. */
10351 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10352 && SYMBOL_REF_DLLIMPORT_P (x))
10353 return false;
10355 #if TARGET_MACHO
10356 /* mdynamic-no-pic */
10357 if (MACHO_DYNAMIC_NO_PIC_P)
10358 return machopic_symbol_defined_p (x);
10359 #endif
10361 /* External function address should be loaded
10362 via the GOT slot to avoid PLT. */
10363 if (ix86_force_load_from_GOT_p (x))
10364 return false;
10366 break;
10368 CASE_CONST_SCALAR_INT:
10369 if (ix86_endbr_immediate_operand (x, VOIDmode))
10370 return false;
10372 switch (mode)
10374 case E_TImode:
10375 if (TARGET_64BIT)
10376 return true;
10377 /* FALLTHRU */
10378 case E_OImode:
10379 case E_XImode:
10380 if (!standard_sse_constant_p (x, mode))
10381 return false;
10382 default:
10383 break;
10385 break;
10387 case CONST_VECTOR:
10388 if (!standard_sse_constant_p (x, mode))
10389 return false;
10391 default:
10392 break;
10395 /* Otherwise we handle everything else in the move patterns. */
10396 return true;
10399 /* Determine if it's legal to put X into the constant pool. This
10400 is not possible for the address of thread-local symbols, which
10401 is checked above. */
10403 static bool
10404 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
10406 /* We can put any immediate constant in memory. */
10407 switch (GET_CODE (x))
10409 CASE_CONST_ANY:
10410 return false;
10412 default:
10413 break;
10416 return !ix86_legitimate_constant_p (mode, x);
10419 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
10420 otherwise zero. */
10422 static bool
10423 is_imported_p (rtx x)
10425 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
10426 || GET_CODE (x) != SYMBOL_REF)
10427 return false;
10429 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
10433 /* Nonzero if the constant value X is a legitimate general operand
10434 when generating PIC code. It is given that flag_pic is on and
10435 that X satisfies CONSTANT_P. */
10437 bool
10438 legitimate_pic_operand_p (rtx x)
10440 rtx inner;
10442 switch (GET_CODE (x))
10444 case CONST:
10445 inner = XEXP (x, 0);
10446 if (GET_CODE (inner) == PLUS
10447 && CONST_INT_P (XEXP (inner, 1)))
10448 inner = XEXP (inner, 0);
10450 /* Only some unspecs are valid as "constants". */
10451 if (GET_CODE (inner) == UNSPEC)
10452 switch (XINT (inner, 1))
10454 case UNSPEC_GOT:
10455 case UNSPEC_GOTOFF:
10456 case UNSPEC_PLTOFF:
10457 return TARGET_64BIT;
10458 case UNSPEC_TPOFF:
10459 x = XVECEXP (inner, 0, 0);
10460 return (GET_CODE (x) == SYMBOL_REF
10461 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10462 case UNSPEC_MACHOPIC_OFFSET:
10463 return legitimate_pic_address_disp_p (x);
10464 default:
10465 return false;
10467 /* FALLTHRU */
10469 case SYMBOL_REF:
10470 case LABEL_REF:
10471 return legitimate_pic_address_disp_p (x);
10473 default:
10474 return true;
10478 /* Determine if a given CONST RTX is a valid memory displacement
10479 in PIC mode. */
10481 bool
10482 legitimate_pic_address_disp_p (rtx disp)
10484 bool saw_plus;
10486 /* In 64bit mode we can allow direct addresses of symbols and labels
10487 when they are not dynamic symbols. */
10488 if (TARGET_64BIT)
10490 rtx op0 = disp, op1;
10492 switch (GET_CODE (disp))
10494 case LABEL_REF:
10495 return true;
10497 case CONST:
10498 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10499 break;
10500 op0 = XEXP (XEXP (disp, 0), 0);
10501 op1 = XEXP (XEXP (disp, 0), 1);
10502 if (!CONST_INT_P (op1))
10503 break;
10504 if (GET_CODE (op0) == UNSPEC
10505 && (XINT (op0, 1) == UNSPEC_DTPOFF
10506 || XINT (op0, 1) == UNSPEC_NTPOFF)
10507 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
10508 return true;
10509 if (INTVAL (op1) >= 16*1024*1024
10510 || INTVAL (op1) < -16*1024*1024)
10511 break;
10512 if (GET_CODE (op0) == LABEL_REF)
10513 return true;
10514 if (GET_CODE (op0) == CONST
10515 && GET_CODE (XEXP (op0, 0)) == UNSPEC
10516 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
10517 return true;
10518 if (GET_CODE (op0) == UNSPEC
10519 && XINT (op0, 1) == UNSPEC_PCREL)
10520 return true;
10521 if (GET_CODE (op0) != SYMBOL_REF)
10522 break;
10523 /* FALLTHRU */
10525 case SYMBOL_REF:
10526 /* TLS references should always be enclosed in UNSPEC.
10527 The dllimported symbol needs always to be resolved. */
10528 if (SYMBOL_REF_TLS_MODEL (op0)
10529 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
10530 return false;
10532 if (TARGET_PECOFF)
10534 if (is_imported_p (op0))
10535 return true;
10537 if (SYMBOL_REF_FAR_ADDR_P (op0)
10538 || !SYMBOL_REF_LOCAL_P (op0))
10539 break;
10541 /* Function-symbols need to be resolved only for
10542 large-model.
10543 For the small-model we don't need to resolve anything
10544 here. */
10545 if ((ix86_cmodel != CM_LARGE_PIC
10546 && SYMBOL_REF_FUNCTION_P (op0))
10547 || ix86_cmodel == CM_SMALL_PIC)
10548 return true;
10549 /* Non-external symbols don't need to be resolved for
10550 large, and medium-model. */
10551 if ((ix86_cmodel == CM_LARGE_PIC
10552 || ix86_cmodel == CM_MEDIUM_PIC)
10553 && !SYMBOL_REF_EXTERNAL_P (op0))
10554 return true;
10556 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
10557 && (SYMBOL_REF_LOCAL_P (op0)
10558 || (HAVE_LD_PIE_COPYRELOC
10559 && flag_pie
10560 && !SYMBOL_REF_WEAK (op0)
10561 && !SYMBOL_REF_FUNCTION_P (op0)))
10562 && ix86_cmodel != CM_LARGE_PIC)
10563 return true;
10564 break;
10566 default:
10567 break;
10570 if (GET_CODE (disp) != CONST)
10571 return false;
10572 disp = XEXP (disp, 0);
10574 if (TARGET_64BIT)
10576 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10577 of GOT tables. We should not need these anyway. */
10578 if (GET_CODE (disp) != UNSPEC
10579 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10580 && XINT (disp, 1) != UNSPEC_GOTOFF
10581 && XINT (disp, 1) != UNSPEC_PCREL
10582 && XINT (disp, 1) != UNSPEC_PLTOFF))
10583 return false;
10585 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10586 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10587 return false;
10588 return true;
10591 saw_plus = false;
10592 if (GET_CODE (disp) == PLUS)
10594 if (!CONST_INT_P (XEXP (disp, 1)))
10595 return false;
10596 disp = XEXP (disp, 0);
10597 saw_plus = true;
10600 if (TARGET_MACHO && darwin_local_data_pic (disp))
10601 return true;
10603 if (GET_CODE (disp) != UNSPEC)
10604 return false;
10606 switch (XINT (disp, 1))
10608 case UNSPEC_GOT:
10609 if (saw_plus)
10610 return false;
10611 /* We need to check for both symbols and labels because VxWorks loads
10612 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10613 details. */
10614 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10615 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10616 case UNSPEC_GOTOFF:
10617 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10618 While ABI specify also 32bit relocation but we don't produce it in
10619 small PIC model at all. */
10620 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10621 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10622 && !TARGET_64BIT)
10623 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10624 return false;
10625 case UNSPEC_GOTTPOFF:
10626 case UNSPEC_GOTNTPOFF:
10627 case UNSPEC_INDNTPOFF:
10628 if (saw_plus)
10629 return false;
10630 disp = XVECEXP (disp, 0, 0);
10631 return (GET_CODE (disp) == SYMBOL_REF
10632 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10633 case UNSPEC_NTPOFF:
10634 disp = XVECEXP (disp, 0, 0);
10635 return (GET_CODE (disp) == SYMBOL_REF
10636 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10637 case UNSPEC_DTPOFF:
10638 disp = XVECEXP (disp, 0, 0);
10639 return (GET_CODE (disp) == SYMBOL_REF
10640 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10643 return false;
10646 /* Determine if op is suitable RTX for an address register.
10647 Return naked register if a register or a register subreg is
10648 found, otherwise return NULL_RTX. */
10650 static rtx
10651 ix86_validate_address_register (rtx op)
10653 machine_mode mode = GET_MODE (op);
10655 /* Only SImode or DImode registers can form the address. */
10656 if (mode != SImode && mode != DImode)
10657 return NULL_RTX;
10659 if (REG_P (op))
10660 return op;
10661 else if (SUBREG_P (op))
10663 rtx reg = SUBREG_REG (op);
10665 if (!REG_P (reg))
10666 return NULL_RTX;
10668 mode = GET_MODE (reg);
10670 /* Don't allow SUBREGs that span more than a word. It can
10671 lead to spill failures when the register is one word out
10672 of a two word structure. */
10673 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
10674 return NULL_RTX;
10676 /* Allow only SUBREGs of non-eliminable hard registers. */
10677 if (register_no_elim_operand (reg, mode))
10678 return reg;
10681 /* Op is not a register. */
10682 return NULL_RTX;
10685 /* Recognizes RTL expressions that are valid memory addresses for an
10686 instruction. The MODE argument is the machine mode for the MEM
10687 expression that wants to use this address.
10689 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10690 convert common non-canonical forms to canonical form so that they will
10691 be recognized. */
10693 static bool
10694 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
10696 struct ix86_address parts;
10697 rtx base, index, disp;
10698 HOST_WIDE_INT scale;
10699 addr_space_t seg;
10701 if (ix86_decompose_address (addr, &parts) <= 0)
10702 /* Decomposition failed. */
10703 return false;
10705 base = parts.base;
10706 index = parts.index;
10707 disp = parts.disp;
10708 scale = parts.scale;
10709 seg = parts.seg;
10711 /* Validate base register. */
10712 if (base)
10714 rtx reg = ix86_validate_address_register (base);
10716 if (reg == NULL_RTX)
10717 return false;
10719 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10720 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10721 /* Base is not valid. */
10722 return false;
10725 /* Validate index register. */
10726 if (index)
10728 rtx reg = ix86_validate_address_register (index);
10730 if (reg == NULL_RTX)
10731 return false;
10733 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10734 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10735 /* Index is not valid. */
10736 return false;
10739 /* Index and base should have the same mode. */
10740 if (base && index
10741 && GET_MODE (base) != GET_MODE (index))
10742 return false;
10744 /* Address override works only on the (%reg) part of %fs:(%reg). */
10745 if (seg != ADDR_SPACE_GENERIC
10746 && ((base && GET_MODE (base) != word_mode)
10747 || (index && GET_MODE (index) != word_mode)))
10748 return false;
10750 /* Validate scale factor. */
10751 if (scale != 1)
10753 if (!index)
10754 /* Scale without index. */
10755 return false;
10757 if (scale != 2 && scale != 4 && scale != 8)
10758 /* Scale is not a valid multiplier. */
10759 return false;
10762 /* Validate displacement. */
10763 if (disp)
10765 if (ix86_endbr_immediate_operand (disp, VOIDmode))
10766 return false;
10768 if (GET_CODE (disp) == CONST
10769 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10770 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10771 switch (XINT (XEXP (disp, 0), 1))
10773 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
10774 when used. While ABI specify also 32bit relocations, we
10775 don't produce them at all and use IP relative instead.
10776 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
10777 should be loaded via GOT. */
10778 case UNSPEC_GOT:
10779 if (!TARGET_64BIT
10780 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10781 goto is_legitimate_pic;
10782 /* FALLTHRU */
10783 case UNSPEC_GOTOFF:
10784 gcc_assert (flag_pic);
10785 if (!TARGET_64BIT)
10786 goto is_legitimate_pic;
10788 /* 64bit address unspec. */
10789 return false;
10791 case UNSPEC_GOTPCREL:
10792 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10793 goto is_legitimate_pic;
10794 /* FALLTHRU */
10795 case UNSPEC_PCREL:
10796 gcc_assert (flag_pic);
10797 goto is_legitimate_pic;
10799 case UNSPEC_GOTTPOFF:
10800 case UNSPEC_GOTNTPOFF:
10801 case UNSPEC_INDNTPOFF:
10802 case UNSPEC_NTPOFF:
10803 case UNSPEC_DTPOFF:
10804 break;
10806 default:
10807 /* Invalid address unspec. */
10808 return false;
10811 else if (SYMBOLIC_CONST (disp)
10812 && (flag_pic
10813 || (TARGET_MACHO
10814 #if TARGET_MACHO
10815 && MACHOPIC_INDIRECT
10816 && !machopic_operand_p (disp)
10817 #endif
10821 is_legitimate_pic:
10822 if (TARGET_64BIT && (index || base))
10824 /* foo@dtpoff(%rX) is ok. */
10825 if (GET_CODE (disp) != CONST
10826 || GET_CODE (XEXP (disp, 0)) != PLUS
10827 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10828 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10829 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10830 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10831 /* Non-constant pic memory reference. */
10832 return false;
10834 else if ((!TARGET_MACHO || flag_pic)
10835 && ! legitimate_pic_address_disp_p (disp))
10836 /* Displacement is an invalid pic construct. */
10837 return false;
10838 #if TARGET_MACHO
10839 else if (MACHO_DYNAMIC_NO_PIC_P
10840 && !ix86_legitimate_constant_p (Pmode, disp))
10841 /* displacment must be referenced via non_lazy_pointer */
10842 return false;
10843 #endif
10845 /* This code used to verify that a symbolic pic displacement
10846 includes the pic_offset_table_rtx register.
10848 While this is good idea, unfortunately these constructs may
10849 be created by "adds using lea" optimization for incorrect
10850 code like:
10852 int a;
10853 int foo(int i)
10855 return *(&a+i);
10858 This code is nonsensical, but results in addressing
10859 GOT table with pic_offset_table_rtx base. We can't
10860 just refuse it easily, since it gets matched by
10861 "addsi3" pattern, that later gets split to lea in the
10862 case output register differs from input. While this
10863 can be handled by separate addsi pattern for this case
10864 that never results in lea, this seems to be easier and
10865 correct fix for crash to disable this test. */
10867 else if (GET_CODE (disp) != LABEL_REF
10868 && !CONST_INT_P (disp)
10869 && (GET_CODE (disp) != CONST
10870 || !ix86_legitimate_constant_p (Pmode, disp))
10871 && (GET_CODE (disp) != SYMBOL_REF
10872 || !ix86_legitimate_constant_p (Pmode, disp)))
10873 /* Displacement is not constant. */
10874 return false;
10875 else if (TARGET_64BIT
10876 && !x86_64_immediate_operand (disp, VOIDmode))
10877 /* Displacement is out of range. */
10878 return false;
10879 /* In x32 mode, constant addresses are sign extended to 64bit, so
10880 we have to prevent addresses from 0x80000000 to 0xffffffff. */
10881 else if (TARGET_X32 && !(index || base)
10882 && CONST_INT_P (disp)
10883 && val_signbit_known_set_p (SImode, INTVAL (disp)))
10884 return false;
10887 /* Everything looks valid. */
10888 return true;
10891 /* Determine if a given RTX is a valid constant address. */
10893 bool
10894 constant_address_p (rtx x)
10896 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10899 /* Return a unique alias set for the GOT. */
10901 alias_set_type
10902 ix86_GOT_alias_set (void)
10904 static alias_set_type set = -1;
10905 if (set == -1)
10906 set = new_alias_set ();
10907 return set;
10910 /* Return a legitimate reference for ORIG (an address) using the
10911 register REG. If REG is 0, a new pseudo is generated.
10913 There are two types of references that must be handled:
10915 1. Global data references must load the address from the GOT, via
10916 the PIC reg. An insn is emitted to do this load, and the reg is
10917 returned.
10919 2. Static data references, constant pool addresses, and code labels
10920 compute the address as an offset from the GOT, whose base is in
10921 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10922 differentiate them from global data objects. The returned
10923 address is the PIC reg + an unspec constant.
10925 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10926 reg also appears in the address. */
10929 legitimize_pic_address (rtx orig, rtx reg)
10931 rtx addr = orig;
10932 rtx new_rtx = orig;
10934 #if TARGET_MACHO
10935 if (TARGET_MACHO && !TARGET_64BIT)
10937 if (reg == 0)
10938 reg = gen_reg_rtx (Pmode);
10939 /* Use the generic Mach-O PIC machinery. */
10940 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10942 #endif
10944 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10946 rtx tmp = legitimize_pe_coff_symbol (addr, true);
10947 if (tmp)
10948 return tmp;
10951 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10952 new_rtx = addr;
10953 else if ((!TARGET_64BIT
10954 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
10955 && !TARGET_PECOFF
10956 && gotoff_operand (addr, Pmode))
10958 /* This symbol may be referenced via a displacement
10959 from the PIC base address (@GOTOFF). */
10960 if (GET_CODE (addr) == CONST)
10961 addr = XEXP (addr, 0);
10963 if (GET_CODE (addr) == PLUS)
10965 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10966 UNSPEC_GOTOFF);
10967 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10969 else
10970 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10972 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10974 if (TARGET_64BIT)
10975 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
10977 if (reg != 0)
10979 gcc_assert (REG_P (reg));
10980 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
10981 new_rtx, reg, 1, OPTAB_DIRECT);
10983 else
10984 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10986 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10987 /* We can't use @GOTOFF for text labels
10988 on VxWorks, see gotoff_operand. */
10989 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10991 rtx tmp = legitimize_pe_coff_symbol (addr, true);
10992 if (tmp)
10993 return tmp;
10995 /* For x64 PE-COFF there is no GOT table,
10996 so we use address directly. */
10997 if (TARGET_64BIT && TARGET_PECOFF)
10999 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
11000 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11002 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11004 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
11005 UNSPEC_GOTPCREL);
11006 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11007 new_rtx = gen_const_mem (Pmode, new_rtx);
11008 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11010 else
11012 /* This symbol must be referenced via a load
11013 from the Global Offset Table (@GOT). */
11014 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11015 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11016 if (TARGET_64BIT)
11017 new_rtx = force_reg (Pmode, new_rtx);
11018 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11019 new_rtx = gen_const_mem (Pmode, new_rtx);
11020 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11023 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11025 else
11027 if (CONST_INT_P (addr)
11028 && !x86_64_immediate_operand (addr, VOIDmode))
11029 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
11030 else if (GET_CODE (addr) == CONST)
11032 addr = XEXP (addr, 0);
11034 /* We must match stuff we generate before. Assume the only
11035 unspecs that can get here are ours. Not that we could do
11036 anything with them anyway.... */
11037 if (GET_CODE (addr) == UNSPEC
11038 || (GET_CODE (addr) == PLUS
11039 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11040 return orig;
11041 gcc_assert (GET_CODE (addr) == PLUS);
11044 if (GET_CODE (addr) == PLUS)
11046 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11048 /* Check first to see if this is a constant
11049 offset from a @GOTOFF symbol reference. */
11050 if (!TARGET_PECOFF
11051 && gotoff_operand (op0, Pmode)
11052 && CONST_INT_P (op1))
11054 if (!TARGET_64BIT)
11056 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11057 UNSPEC_GOTOFF);
11058 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11059 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11061 if (reg != 0)
11063 gcc_assert (REG_P (reg));
11064 new_rtx = expand_simple_binop (Pmode, PLUS,
11065 pic_offset_table_rtx,
11066 new_rtx, reg, 1,
11067 OPTAB_DIRECT);
11069 else
11070 new_rtx
11071 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11073 else
11075 if (INTVAL (op1) < -16*1024*1024
11076 || INTVAL (op1) >= 16*1024*1024)
11078 if (!x86_64_immediate_operand (op1, Pmode))
11079 op1 = force_reg (Pmode, op1);
11081 new_rtx
11082 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11086 else
11088 rtx base = legitimize_pic_address (op0, reg);
11089 machine_mode mode = GET_MODE (base);
11090 new_rtx
11091 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
11093 if (CONST_INT_P (new_rtx))
11095 if (INTVAL (new_rtx) < -16*1024*1024
11096 || INTVAL (new_rtx) >= 16*1024*1024)
11098 if (!x86_64_immediate_operand (new_rtx, mode))
11099 new_rtx = force_reg (mode, new_rtx);
11101 new_rtx
11102 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
11104 else
11105 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
11107 else
11109 /* For %rip addressing, we have to use
11110 just disp32, not base nor index. */
11111 if (TARGET_64BIT
11112 && (GET_CODE (base) == SYMBOL_REF
11113 || GET_CODE (base) == LABEL_REF))
11114 base = force_reg (mode, base);
11115 if (GET_CODE (new_rtx) == PLUS
11116 && CONSTANT_P (XEXP (new_rtx, 1)))
11118 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
11119 new_rtx = XEXP (new_rtx, 1);
11121 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
11126 return new_rtx;
11129 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11131 static rtx
11132 get_thread_pointer (machine_mode tp_mode, bool to_reg)
11134 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11136 if (GET_MODE (tp) != tp_mode)
11138 gcc_assert (GET_MODE (tp) == SImode);
11139 gcc_assert (tp_mode == DImode);
11141 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
11144 if (to_reg)
11145 tp = copy_to_mode_reg (tp_mode, tp);
11147 return tp;
11150 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11152 static GTY(()) rtx ix86_tls_symbol;
11154 static rtx
11155 ix86_tls_get_addr (void)
11157 if (!ix86_tls_symbol)
11159 const char *sym
11160 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
11161 ? "___tls_get_addr" : "__tls_get_addr");
11163 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
11166 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
11168 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
11169 UNSPEC_PLTOFF);
11170 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
11171 gen_rtx_CONST (Pmode, unspec));
11174 return ix86_tls_symbol;
11177 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
11179 static GTY(()) rtx ix86_tls_module_base_symbol;
11182 ix86_tls_module_base (void)
11184 if (!ix86_tls_module_base_symbol)
11186 ix86_tls_module_base_symbol
11187 = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
11189 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
11190 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
11193 return ix86_tls_module_base_symbol;
11196 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11197 false if we expect this to be used for a memory address and true if
11198 we expect to load the address into a register. */
11201 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
11203 rtx dest, base, off;
11204 rtx pic = NULL_RTX, tp = NULL_RTX;
11205 machine_mode tp_mode = Pmode;
11206 int type;
11208 /* Fall back to global dynamic model if tool chain cannot support local
11209 dynamic. */
11210 if (TARGET_SUN_TLS && !TARGET_64BIT
11211 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
11212 && model == TLS_MODEL_LOCAL_DYNAMIC)
11213 model = TLS_MODEL_GLOBAL_DYNAMIC;
11215 switch (model)
11217 case TLS_MODEL_GLOBAL_DYNAMIC:
11218 if (!TARGET_64BIT)
11220 if (flag_pic && !TARGET_PECOFF)
11221 pic = pic_offset_table_rtx;
11222 else
11224 pic = gen_reg_rtx (Pmode);
11225 emit_insn (gen_set_got (pic));
11229 if (TARGET_GNU2_TLS)
11231 dest = gen_reg_rtx (ptr_mode);
11232 if (TARGET_64BIT)
11233 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
11234 else
11235 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
11237 tp = get_thread_pointer (ptr_mode, true);
11238 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11239 if (GET_MODE (dest) != Pmode)
11240 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11241 dest = force_reg (Pmode, dest);
11243 if (GET_MODE (x) != Pmode)
11244 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11246 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11248 else
11250 rtx caddr = ix86_tls_get_addr ();
11252 dest = gen_reg_rtx (Pmode);
11253 if (TARGET_64BIT)
11255 rtx rax = gen_rtx_REG (Pmode, AX_REG);
11256 rtx_insn *insns;
11258 start_sequence ();
11259 emit_call_insn
11260 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
11261 insns = get_insns ();
11262 end_sequence ();
11264 if (GET_MODE (x) != Pmode)
11265 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11267 RTL_CONST_CALL_P (insns) = 1;
11268 emit_libcall_block (insns, dest, rax, x);
11270 else
11271 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
11273 break;
11275 case TLS_MODEL_LOCAL_DYNAMIC:
11276 if (!TARGET_64BIT)
11278 if (flag_pic)
11279 pic = pic_offset_table_rtx;
11280 else
11282 pic = gen_reg_rtx (Pmode);
11283 emit_insn (gen_set_got (pic));
11287 if (TARGET_GNU2_TLS)
11289 rtx tmp = ix86_tls_module_base ();
11291 base = gen_reg_rtx (ptr_mode);
11292 if (TARGET_64BIT)
11293 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
11294 else
11295 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
11297 tp = get_thread_pointer (ptr_mode, true);
11298 if (GET_MODE (base) != Pmode)
11299 base = gen_rtx_ZERO_EXTEND (Pmode, base);
11300 base = force_reg (Pmode, base);
11302 else
11304 rtx caddr = ix86_tls_get_addr ();
11306 base = gen_reg_rtx (Pmode);
11307 if (TARGET_64BIT)
11309 rtx rax = gen_rtx_REG (Pmode, AX_REG);
11310 rtx_insn *insns;
11311 rtx eqv;
11313 start_sequence ();
11314 emit_call_insn
11315 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
11316 insns = get_insns ();
11317 end_sequence ();
11319 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
11320 share the LD_BASE result with other LD model accesses. */
11321 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11322 UNSPEC_TLS_LD_BASE);
11324 RTL_CONST_CALL_P (insns) = 1;
11325 emit_libcall_block (insns, base, rax, eqv);
11327 else
11328 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
11331 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11332 off = gen_rtx_CONST (Pmode, off);
11334 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11336 if (TARGET_GNU2_TLS)
11338 if (GET_MODE (tp) != Pmode)
11340 dest = lowpart_subreg (ptr_mode, dest, Pmode);
11341 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11342 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11344 else
11345 dest = gen_rtx_PLUS (Pmode, tp, dest);
11346 dest = force_reg (Pmode, dest);
11348 if (GET_MODE (x) != Pmode)
11349 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11351 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11353 break;
11355 case TLS_MODEL_INITIAL_EXEC:
11356 if (TARGET_64BIT)
11358 if (TARGET_SUN_TLS && !TARGET_X32)
11360 /* The Sun linker took the AMD64 TLS spec literally
11361 and can only handle %rax as destination of the
11362 initial executable code sequence. */
11364 dest = gen_reg_rtx (DImode);
11365 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
11366 return dest;
11369 /* Generate DImode references to avoid %fs:(%reg32)
11370 problems and linker IE->LE relaxation bug. */
11371 tp_mode = DImode;
11372 pic = NULL;
11373 type = UNSPEC_GOTNTPOFF;
11375 else if (flag_pic)
11377 pic = pic_offset_table_rtx;
11378 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11380 else if (!TARGET_ANY_GNU_TLS)
11382 pic = gen_reg_rtx (Pmode);
11383 emit_insn (gen_set_got (pic));
11384 type = UNSPEC_GOTTPOFF;
11386 else
11388 pic = NULL;
11389 type = UNSPEC_INDNTPOFF;
11392 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
11393 off = gen_rtx_CONST (tp_mode, off);
11394 if (pic)
11395 off = gen_rtx_PLUS (tp_mode, pic, off);
11396 off = gen_const_mem (tp_mode, off);
11397 set_mem_alias_set (off, ix86_GOT_alias_set ());
11399 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11401 base = get_thread_pointer (tp_mode,
11402 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11403 off = force_reg (tp_mode, off);
11404 dest = gen_rtx_PLUS (tp_mode, base, off);
11405 if (tp_mode != Pmode)
11406 dest = convert_to_mode (Pmode, dest, 1);
11408 else
11410 base = get_thread_pointer (Pmode, true);
11411 dest = gen_reg_rtx (Pmode);
11412 emit_insn (gen_sub3_insn (dest, base, off));
11414 break;
11416 case TLS_MODEL_LOCAL_EXEC:
11417 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11418 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11419 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11420 off = gen_rtx_CONST (Pmode, off);
11422 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11424 base = get_thread_pointer (Pmode,
11425 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11426 return gen_rtx_PLUS (Pmode, base, off);
11428 else
11430 base = get_thread_pointer (Pmode, true);
11431 dest = gen_reg_rtx (Pmode);
11432 emit_insn (gen_sub3_insn (dest, base, off));
11434 break;
11436 default:
11437 gcc_unreachable ();
11440 return dest;
11443 /* Return true if OP refers to a TLS address. */
11444 bool
11445 ix86_tls_address_pattern_p (rtx op)
11447 subrtx_var_iterator::array_type array;
11448 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
11450 rtx op = *iter;
11451 if (MEM_P (op))
11453 rtx *x = &XEXP (op, 0);
11454 while (GET_CODE (*x) == PLUS)
11456 int i;
11457 for (i = 0; i < 2; i++)
11459 rtx u = XEXP (*x, i);
11460 if (GET_CODE (u) == ZERO_EXTEND)
11461 u = XEXP (u, 0);
11462 if (GET_CODE (u) == UNSPEC
11463 && XINT (u, 1) == UNSPEC_TP)
11464 return true;
11466 x = &XEXP (*x, 0);
11469 iter.skip_subrtxes ();
11473 return false;
11476 /* Rewrite *LOC so that it refers to a default TLS address space. */
11477 void
11478 ix86_rewrite_tls_address_1 (rtx *loc)
11480 subrtx_ptr_iterator::array_type array;
11481 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
11483 rtx *loc = *iter;
11484 if (MEM_P (*loc))
11486 rtx addr = XEXP (*loc, 0);
11487 rtx *x = &addr;
11488 while (GET_CODE (*x) == PLUS)
11490 int i;
11491 for (i = 0; i < 2; i++)
11493 rtx u = XEXP (*x, i);
11494 if (GET_CODE (u) == ZERO_EXTEND)
11495 u = XEXP (u, 0);
11496 if (GET_CODE (u) == UNSPEC
11497 && XINT (u, 1) == UNSPEC_TP)
11499 addr_space_t as = DEFAULT_TLS_SEG_REG;
11501 *x = XEXP (*x, 1 - i);
11503 *loc = replace_equiv_address_nv (*loc, addr, true);
11504 set_mem_addr_space (*loc, as);
11505 return;
11508 x = &XEXP (*x, 0);
11511 iter.skip_subrtxes ();
11516 /* Rewrite instruction pattern involvning TLS address
11517 so that it refers to a default TLS address space. */
11519 ix86_rewrite_tls_address (rtx pattern)
11521 pattern = copy_insn (pattern);
11522 ix86_rewrite_tls_address_1 (&pattern);
11523 return pattern;
11526 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11527 to symbol DECL if BEIMPORT is true. Otherwise create or return the
11528 unique refptr-DECL symbol corresponding to symbol DECL. */
11530 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
11532 static inline hashval_t hash (tree_map *m) { return m->hash; }
11533 static inline bool
11534 equal (tree_map *a, tree_map *b)
11536 return a->base.from == b->base.from;
11539 static int
11540 keep_cache_entry (tree_map *&m)
11542 return ggc_marked_p (m->base.from);
11546 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
11548 static tree
11549 get_dllimport_decl (tree decl, bool beimport)
11551 struct tree_map *h, in;
11552 const char *name;
11553 const char *prefix;
11554 size_t namelen, prefixlen;
11555 char *imp_name;
11556 tree to;
11557 rtx rtl;
11559 if (!dllimport_map)
11560 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
11562 in.hash = htab_hash_pointer (decl);
11563 in.base.from = decl;
11564 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
11565 h = *loc;
11566 if (h)
11567 return h->to;
11569 *loc = h = ggc_alloc<tree_map> ();
11570 h->hash = in.hash;
11571 h->base.from = decl;
11572 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11573 VAR_DECL, NULL, ptr_type_node);
11574 DECL_ARTIFICIAL (to) = 1;
11575 DECL_IGNORED_P (to) = 1;
11576 DECL_EXTERNAL (to) = 1;
11577 TREE_READONLY (to) = 1;
11579 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11580 name = targetm.strip_name_encoding (name);
11581 if (beimport)
11582 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11583 ? "*__imp_" : "*__imp__";
11584 else
11585 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
11586 namelen = strlen (name);
11587 prefixlen = strlen (prefix);
11588 imp_name = (char *) alloca (namelen + prefixlen + 1);
11589 memcpy (imp_name, prefix, prefixlen);
11590 memcpy (imp_name + prefixlen, name, namelen + 1);
11592 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11593 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11594 SET_SYMBOL_REF_DECL (rtl, to);
11595 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
11596 if (!beimport)
11598 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
11599 #ifdef SUB_TARGET_RECORD_STUB
11600 SUB_TARGET_RECORD_STUB (name);
11601 #endif
11604 rtl = gen_const_mem (Pmode, rtl);
11605 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11607 SET_DECL_RTL (to, rtl);
11608 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11610 return to;
11613 /* Expand SYMBOL into its corresponding far-address symbol.
11614 WANT_REG is true if we require the result be a register. */
11616 static rtx
11617 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
11619 tree imp_decl;
11620 rtx x;
11622 gcc_assert (SYMBOL_REF_DECL (symbol));
11623 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
11625 x = DECL_RTL (imp_decl);
11626 if (want_reg)
11627 x = force_reg (Pmode, x);
11628 return x;
11631 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11632 true if we require the result be a register. */
11634 static rtx
11635 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11637 tree imp_decl;
11638 rtx x;
11640 gcc_assert (SYMBOL_REF_DECL (symbol));
11641 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
11643 x = DECL_RTL (imp_decl);
11644 if (want_reg)
11645 x = force_reg (Pmode, x);
11646 return x;
11649 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
11650 is true if we require the result be a register. */
11653 legitimize_pe_coff_symbol (rtx addr, bool inreg)
11655 if (!TARGET_PECOFF)
11656 return NULL_RTX;
11658 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11660 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11661 return legitimize_dllimport_symbol (addr, inreg);
11662 if (GET_CODE (addr) == CONST
11663 && GET_CODE (XEXP (addr, 0)) == PLUS
11664 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11665 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11667 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
11668 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11672 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
11673 return NULL_RTX;
11674 if (GET_CODE (addr) == SYMBOL_REF
11675 && !is_imported_p (addr)
11676 && SYMBOL_REF_EXTERNAL_P (addr)
11677 && SYMBOL_REF_DECL (addr))
11678 return legitimize_pe_coff_extern_decl (addr, inreg);
11680 if (GET_CODE (addr) == CONST
11681 && GET_CODE (XEXP (addr, 0)) == PLUS
11682 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11683 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
11684 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
11685 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
11687 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
11688 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11690 return NULL_RTX;
11693 /* Try machine-dependent ways of modifying an illegitimate address
11694 to be legitimate. If we find one, return the new, valid address.
11695 This macro is used in only one place: `memory_address' in explow.c.
11697 OLDX is the address as it was before break_out_memory_refs was called.
11698 In some cases it is useful to look at this to decide what needs to be done.
11700 It is always safe for this macro to do nothing. It exists to recognize
11701 opportunities to optimize the output.
11703 For the 80386, we handle X+REG by loading X into a register R and
11704 using R+REG. R will go in a general reg and indexing will be used.
11705 However, if REG is a broken-out memory address or multiplication,
11706 nothing needs to be done because REG can certainly go in a general reg.
11708 When -fpic is used, special handling is needed for symbolic references.
11709 See comments by legitimize_pic_address in i386.c for details. */
11711 static rtx
11712 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
11714 bool changed = false;
11715 unsigned log;
11717 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11718 if (log)
11719 return legitimize_tls_address (x, (enum tls_model) log, false);
11720 if (GET_CODE (x) == CONST
11721 && GET_CODE (XEXP (x, 0)) == PLUS
11722 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11723 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11725 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11726 (enum tls_model) log, false);
11727 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11730 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11732 rtx tmp = legitimize_pe_coff_symbol (x, true);
11733 if (tmp)
11734 return tmp;
11737 if (flag_pic && SYMBOLIC_CONST (x))
11738 return legitimize_pic_address (x, 0);
11740 #if TARGET_MACHO
11741 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
11742 return machopic_indirect_data_reference (x, 0);
11743 #endif
11745 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11746 if (GET_CODE (x) == ASHIFT
11747 && CONST_INT_P (XEXP (x, 1))
11748 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11750 changed = true;
11751 log = INTVAL (XEXP (x, 1));
11752 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11753 GEN_INT (1 << log));
11756 if (GET_CODE (x) == PLUS)
11758 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11760 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11761 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11762 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11764 changed = true;
11765 log = INTVAL (XEXP (XEXP (x, 0), 1));
11766 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11767 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11768 GEN_INT (1 << log));
11771 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11772 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11773 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11775 changed = true;
11776 log = INTVAL (XEXP (XEXP (x, 1), 1));
11777 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11778 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11779 GEN_INT (1 << log));
11782 /* Put multiply first if it isn't already. */
11783 if (GET_CODE (XEXP (x, 1)) == MULT)
11785 std::swap (XEXP (x, 0), XEXP (x, 1));
11786 changed = true;
11789 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11790 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11791 created by virtual register instantiation, register elimination, and
11792 similar optimizations. */
11793 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11795 changed = true;
11796 x = gen_rtx_PLUS (Pmode,
11797 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11798 XEXP (XEXP (x, 1), 0)),
11799 XEXP (XEXP (x, 1), 1));
11802 /* Canonicalize
11803 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11804 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11805 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11806 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11807 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11808 && CONSTANT_P (XEXP (x, 1)))
11810 rtx constant;
11811 rtx other = NULL_RTX;
11813 if (CONST_INT_P (XEXP (x, 1)))
11815 constant = XEXP (x, 1);
11816 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11818 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11820 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11821 other = XEXP (x, 1);
11823 else
11824 constant = 0;
11826 if (constant)
11828 changed = true;
11829 x = gen_rtx_PLUS (Pmode,
11830 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11831 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11832 plus_constant (Pmode, other,
11833 INTVAL (constant)));
11837 if (changed && ix86_legitimate_address_p (mode, x, false))
11838 return x;
11840 if (GET_CODE (XEXP (x, 0)) == MULT)
11842 changed = true;
11843 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
11846 if (GET_CODE (XEXP (x, 1)) == MULT)
11848 changed = true;
11849 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
11852 if (changed
11853 && REG_P (XEXP (x, 1))
11854 && REG_P (XEXP (x, 0)))
11855 return x;
11857 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11859 changed = true;
11860 x = legitimize_pic_address (x, 0);
11863 if (changed && ix86_legitimate_address_p (mode, x, false))
11864 return x;
11866 if (REG_P (XEXP (x, 0)))
11868 rtx temp = gen_reg_rtx (Pmode);
11869 rtx val = force_operand (XEXP (x, 1), temp);
11870 if (val != temp)
11872 val = convert_to_mode (Pmode, val, 1);
11873 emit_move_insn (temp, val);
11876 XEXP (x, 1) = temp;
11877 return x;
11880 else if (REG_P (XEXP (x, 1)))
11882 rtx temp = gen_reg_rtx (Pmode);
11883 rtx val = force_operand (XEXP (x, 0), temp);
11884 if (val != temp)
11886 val = convert_to_mode (Pmode, val, 1);
11887 emit_move_insn (temp, val);
11890 XEXP (x, 0) = temp;
11891 return x;
11895 return x;
11898 /* Print an integer constant expression in assembler syntax. Addition
11899 and subtraction are the only arithmetic that may appear in these
11900 expressions. FILE is the stdio stream to write to, X is the rtx, and
11901 CODE is the operand print code from the output string. */
11903 static void
11904 output_pic_addr_const (FILE *file, rtx x, int code)
11906 char buf[256];
11908 switch (GET_CODE (x))
11910 case PC:
11911 gcc_assert (flag_pic);
11912 putc ('.', file);
11913 break;
11915 case SYMBOL_REF:
11916 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
11917 output_addr_const (file, x);
11918 else
11920 const char *name = XSTR (x, 0);
11922 /* Mark the decl as referenced so that cgraph will
11923 output the function. */
11924 if (SYMBOL_REF_DECL (x))
11925 mark_decl_referenced (SYMBOL_REF_DECL (x));
11927 #if TARGET_MACHO
11928 if (MACHOPIC_INDIRECT
11929 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11930 name = machopic_indirection_name (x, /*stub_p=*/true);
11931 #endif
11932 assemble_name (file, name);
11934 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
11935 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11936 fputs ("@PLT", file);
11937 break;
11939 case LABEL_REF:
11940 x = XEXP (x, 0);
11941 /* FALLTHRU */
11942 case CODE_LABEL:
11943 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11944 assemble_name (asm_out_file, buf);
11945 break;
11947 case CONST_INT:
11948 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11949 break;
11951 case CONST:
11952 /* This used to output parentheses around the expression,
11953 but that does not work on the 386 (either ATT or BSD assembler). */
11954 output_pic_addr_const (file, XEXP (x, 0), code);
11955 break;
11957 case CONST_DOUBLE:
11958 /* We can't handle floating point constants;
11959 TARGET_PRINT_OPERAND must handle them. */
11960 output_operand_lossage ("floating constant misused");
11961 break;
11963 case PLUS:
11964 /* Some assemblers need integer constants to appear first. */
11965 if (CONST_INT_P (XEXP (x, 0)))
11967 output_pic_addr_const (file, XEXP (x, 0), code);
11968 putc ('+', file);
11969 output_pic_addr_const (file, XEXP (x, 1), code);
11971 else
11973 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11974 output_pic_addr_const (file, XEXP (x, 1), code);
11975 putc ('+', file);
11976 output_pic_addr_const (file, XEXP (x, 0), code);
11978 break;
11980 case MINUS:
11981 if (!TARGET_MACHO)
11982 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11983 output_pic_addr_const (file, XEXP (x, 0), code);
11984 putc ('-', file);
11985 output_pic_addr_const (file, XEXP (x, 1), code);
11986 if (!TARGET_MACHO)
11987 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11988 break;
11990 case UNSPEC:
11991 gcc_assert (XVECLEN (x, 0) == 1);
11992 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11993 switch (XINT (x, 1))
11995 case UNSPEC_GOT:
11996 fputs ("@GOT", file);
11997 break;
11998 case UNSPEC_GOTOFF:
11999 fputs ("@GOTOFF", file);
12000 break;
12001 case UNSPEC_PLTOFF:
12002 fputs ("@PLTOFF", file);
12003 break;
12004 case UNSPEC_PCREL:
12005 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12006 "(%rip)" : "[rip]", file);
12007 break;
12008 case UNSPEC_GOTPCREL:
12009 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12010 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12011 break;
12012 case UNSPEC_GOTTPOFF:
12013 /* FIXME: This might be @TPOFF in Sun ld too. */
12014 fputs ("@gottpoff", file);
12015 break;
12016 case UNSPEC_TPOFF:
12017 fputs ("@tpoff", file);
12018 break;
12019 case UNSPEC_NTPOFF:
12020 if (TARGET_64BIT)
12021 fputs ("@tpoff", file);
12022 else
12023 fputs ("@ntpoff", file);
12024 break;
12025 case UNSPEC_DTPOFF:
12026 fputs ("@dtpoff", file);
12027 break;
12028 case UNSPEC_GOTNTPOFF:
12029 if (TARGET_64BIT)
12030 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12031 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12032 else
12033 fputs ("@gotntpoff", file);
12034 break;
12035 case UNSPEC_INDNTPOFF:
12036 fputs ("@indntpoff", file);
12037 break;
12038 #if TARGET_MACHO
12039 case UNSPEC_MACHOPIC_OFFSET:
12040 putc ('-', file);
12041 machopic_output_function_base_name (file);
12042 break;
12043 #endif
12044 default:
12045 output_operand_lossage ("invalid UNSPEC as operand");
12046 break;
12048 break;
12050 default:
12051 output_operand_lossage ("invalid expression as operand");
12055 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12056 We need to emit DTP-relative relocations. */
12058 static void ATTRIBUTE_UNUSED
12059 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12061 fputs (ASM_LONG, file);
12062 output_addr_const (file, x);
12063 fputs ("@dtpoff", file);
12064 switch (size)
12066 case 4:
12067 break;
12068 case 8:
12069 fputs (", 0", file);
12070 break;
12071 default:
12072 gcc_unreachable ();
12076 /* Return true if X is a representation of the PIC register. This copes
12077 with calls from ix86_find_base_term, where the register might have
12078 been replaced by a cselib value. */
12080 static bool
12081 ix86_pic_register_p (rtx x)
12083 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12084 return (pic_offset_table_rtx
12085 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12086 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
12087 return true;
12088 else if (!REG_P (x))
12089 return false;
12090 else if (pic_offset_table_rtx)
12092 if (REGNO (x) == REGNO (pic_offset_table_rtx))
12093 return true;
12094 if (HARD_REGISTER_P (x)
12095 && !HARD_REGISTER_P (pic_offset_table_rtx)
12096 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
12097 return true;
12098 return false;
12100 else
12101 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12104 /* Helper function for ix86_delegitimize_address.
12105 Attempt to delegitimize TLS local-exec accesses. */
12107 static rtx
12108 ix86_delegitimize_tls_address (rtx orig_x)
12110 rtx x = orig_x, unspec;
12111 struct ix86_address addr;
12113 if (!TARGET_TLS_DIRECT_SEG_REFS)
12114 return orig_x;
12115 if (MEM_P (x))
12116 x = XEXP (x, 0);
12117 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12118 return orig_x;
12119 if (ix86_decompose_address (x, &addr) == 0
12120 || addr.seg != DEFAULT_TLS_SEG_REG
12121 || addr.disp == NULL_RTX
12122 || GET_CODE (addr.disp) != CONST)
12123 return orig_x;
12124 unspec = XEXP (addr.disp, 0);
12125 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12126 unspec = XEXP (unspec, 0);
12127 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12128 return orig_x;
12129 x = XVECEXP (unspec, 0, 0);
12130 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12131 if (unspec != XEXP (addr.disp, 0))
12132 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12133 if (addr.index)
12135 rtx idx = addr.index;
12136 if (addr.scale != 1)
12137 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12138 x = gen_rtx_PLUS (Pmode, idx, x);
12140 if (addr.base)
12141 x = gen_rtx_PLUS (Pmode, addr.base, x);
12142 if (MEM_P (orig_x))
12143 x = replace_equiv_address_nv (orig_x, x);
12144 return x;
12147 /* In the name of slightly smaller debug output, and to cater to
12148 general assembler lossage, recognize PIC+GOTOFF and turn it back
12149 into a direct symbol reference.
12151 On Darwin, this is necessary to avoid a crash, because Darwin
12152 has a different PIC label for each routine but the DWARF debugging
12153 information is not associated with any particular routine, so it's
12154 necessary to remove references to the PIC label from RTL stored by
12155 the DWARF output code.
12157 This helper is used in the normal ix86_delegitimize_address
12158 entrypoint (e.g. used in the target delegitimization hook) and
12159 in ix86_find_base_term. As compile time memory optimization, we
12160 avoid allocating rtxes that will not change anything on the outcome
12161 of the callers (find_base_value and find_base_term). */
12163 static inline rtx
12164 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
12166 rtx orig_x = delegitimize_mem_from_attrs (x);
12167 /* addend is NULL or some rtx if x is something+GOTOFF where
12168 something doesn't include the PIC register. */
12169 rtx addend = NULL_RTX;
12170 /* reg_addend is NULL or a multiple of some register. */
12171 rtx reg_addend = NULL_RTX;
12172 /* const_addend is NULL or a const_int. */
12173 rtx const_addend = NULL_RTX;
12174 /* This is the result, or NULL. */
12175 rtx result = NULL_RTX;
12177 x = orig_x;
12179 if (MEM_P (x))
12180 x = XEXP (x, 0);
12182 if (TARGET_64BIT)
12184 if (GET_CODE (x) == CONST
12185 && GET_CODE (XEXP (x, 0)) == PLUS
12186 && GET_MODE (XEXP (x, 0)) == Pmode
12187 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12188 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
12189 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
12191 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
12192 base. A CONST can't be arg_pointer_rtx based. */
12193 if (base_term_p && MEM_P (orig_x))
12194 return orig_x;
12195 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
12196 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
12197 if (MEM_P (orig_x))
12198 x = replace_equiv_address_nv (orig_x, x);
12199 return x;
12202 if (GET_CODE (x) == CONST
12203 && GET_CODE (XEXP (x, 0)) == UNSPEC
12204 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
12205 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
12206 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
12208 x = XVECEXP (XEXP (x, 0), 0, 0);
12209 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
12211 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
12212 if (x == NULL_RTX)
12213 return orig_x;
12215 return x;
12218 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
12219 return ix86_delegitimize_tls_address (orig_x);
12221 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
12222 and -mcmodel=medium -fpic. */
12225 if (GET_CODE (x) != PLUS
12226 || GET_CODE (XEXP (x, 1)) != CONST)
12227 return ix86_delegitimize_tls_address (orig_x);
12229 if (ix86_pic_register_p (XEXP (x, 0)))
12230 /* %ebx + GOT/GOTOFF */
12232 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12234 /* %ebx + %reg * scale + GOT/GOTOFF */
12235 reg_addend = XEXP (x, 0);
12236 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12237 reg_addend = XEXP (reg_addend, 1);
12238 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12239 reg_addend = XEXP (reg_addend, 0);
12240 else
12242 reg_addend = NULL_RTX;
12243 addend = XEXP (x, 0);
12246 else
12247 addend = XEXP (x, 0);
12249 x = XEXP (XEXP (x, 1), 0);
12250 if (GET_CODE (x) == PLUS
12251 && CONST_INT_P (XEXP (x, 1)))
12253 const_addend = XEXP (x, 1);
12254 x = XEXP (x, 0);
12257 if (GET_CODE (x) == UNSPEC
12258 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12259 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
12260 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
12261 && !MEM_P (orig_x) && !addend)))
12262 result = XVECEXP (x, 0, 0);
12264 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
12265 && !MEM_P (orig_x))
12266 result = XVECEXP (x, 0, 0);
12268 if (! result)
12269 return ix86_delegitimize_tls_address (orig_x);
12271 /* For (PLUS something CONST_INT) both find_base_{value,term} just
12272 recurse on the first operand. */
12273 if (const_addend && !base_term_p)
12274 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12275 if (reg_addend)
12276 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12277 if (addend)
12279 /* If the rest of original X doesn't involve the PIC register, add
12280 addend and subtract pic_offset_table_rtx. This can happen e.g.
12281 for code like:
12282 leal (%ebx, %ecx, 4), %ecx
12284 movl foo@GOTOFF(%ecx), %edx
12285 in which case we return (%ecx - %ebx) + foo
12286 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
12287 and reload has completed. Don't do the latter for debug,
12288 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
12289 if (pic_offset_table_rtx
12290 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
12291 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12292 pic_offset_table_rtx),
12293 result);
12294 else if (base_term_p
12295 && pic_offset_table_rtx
12296 && !TARGET_MACHO
12297 && !TARGET_VXWORKS_RTP)
12299 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
12300 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
12301 result = gen_rtx_PLUS (Pmode, tmp, result);
12303 else
12304 return orig_x;
12306 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12308 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
12309 if (result == NULL_RTX)
12310 return orig_x;
12312 return result;
12315 /* The normal instantiation of the above template. */
12317 static rtx
12318 ix86_delegitimize_address (rtx x)
12320 return ix86_delegitimize_address_1 (x, false);
12323 /* If X is a machine specific address (i.e. a symbol or label being
12324 referenced as a displacement from the GOT implemented using an
12325 UNSPEC), then return the base term. Otherwise return X. */
12328 ix86_find_base_term (rtx x)
12330 rtx term;
12332 if (TARGET_64BIT)
12334 if (GET_CODE (x) != CONST)
12335 return x;
12336 term = XEXP (x, 0);
12337 if (GET_CODE (term) == PLUS
12338 && CONST_INT_P (XEXP (term, 1)))
12339 term = XEXP (term, 0);
12340 if (GET_CODE (term) != UNSPEC
12341 || (XINT (term, 1) != UNSPEC_GOTPCREL
12342 && XINT (term, 1) != UNSPEC_PCREL))
12343 return x;
12345 return XVECEXP (term, 0, 0);
12348 return ix86_delegitimize_address_1 (x, true);
12351 /* Return true if X shouldn't be emitted into the debug info.
12352 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
12353 symbol easily into the .debug_info section, so we need not to
12354 delegitimize, but instead assemble as @gotoff.
12355 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
12356 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
12358 static bool
12359 ix86_const_not_ok_for_debug_p (rtx x)
12361 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
12362 return true;
12364 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
12365 return true;
12367 return false;
12370 static void
12371 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
12372 bool fp, FILE *file)
12374 const char *suffix;
12376 if (mode == CCFPmode)
12378 code = ix86_fp_compare_code_to_integer (code);
12379 mode = CCmode;
12381 if (reverse)
12382 code = reverse_condition (code);
12384 switch (code)
12386 case EQ:
12387 gcc_assert (mode != CCGZmode);
12388 switch (mode)
12390 case E_CCAmode:
12391 suffix = "a";
12392 break;
12393 case E_CCCmode:
12394 suffix = "c";
12395 break;
12396 case E_CCOmode:
12397 suffix = "o";
12398 break;
12399 case E_CCPmode:
12400 suffix = "p";
12401 break;
12402 case E_CCSmode:
12403 suffix = "s";
12404 break;
12405 default:
12406 suffix = "e";
12407 break;
12409 break;
12410 case NE:
12411 gcc_assert (mode != CCGZmode);
12412 switch (mode)
12414 case E_CCAmode:
12415 suffix = "na";
12416 break;
12417 case E_CCCmode:
12418 suffix = "nc";
12419 break;
12420 case E_CCOmode:
12421 suffix = "no";
12422 break;
12423 case E_CCPmode:
12424 suffix = "np";
12425 break;
12426 case E_CCSmode:
12427 suffix = "ns";
12428 break;
12429 default:
12430 suffix = "ne";
12431 break;
12433 break;
12434 case GT:
12435 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12436 suffix = "g";
12437 break;
12438 case GTU:
12439 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12440 Those same assemblers have the same but opposite lossage on cmov. */
12441 if (mode == CCmode)
12442 suffix = fp ? "nbe" : "a";
12443 else
12444 gcc_unreachable ();
12445 break;
12446 case LT:
12447 switch (mode)
12449 case E_CCNOmode:
12450 case E_CCGOCmode:
12451 suffix = "s";
12452 break;
12454 case E_CCmode:
12455 case E_CCGCmode:
12456 case E_CCGZmode:
12457 suffix = "l";
12458 break;
12460 default:
12461 gcc_unreachable ();
12463 break;
12464 case LTU:
12465 if (mode == CCmode || mode == CCGZmode)
12466 suffix = "b";
12467 else if (mode == CCCmode)
12468 suffix = fp ? "b" : "c";
12469 else
12470 gcc_unreachable ();
12471 break;
12472 case GE:
12473 switch (mode)
12475 case E_CCNOmode:
12476 case E_CCGOCmode:
12477 suffix = "ns";
12478 break;
12480 case E_CCmode:
12481 case E_CCGCmode:
12482 case E_CCGZmode:
12483 suffix = "ge";
12484 break;
12486 default:
12487 gcc_unreachable ();
12489 break;
12490 case GEU:
12491 if (mode == CCmode || mode == CCGZmode)
12492 suffix = "nb";
12493 else if (mode == CCCmode)
12494 suffix = fp ? "nb" : "nc";
12495 else
12496 gcc_unreachable ();
12497 break;
12498 case LE:
12499 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12500 suffix = "le";
12501 break;
12502 case LEU:
12503 if (mode == CCmode)
12504 suffix = "be";
12505 else
12506 gcc_unreachable ();
12507 break;
12508 case UNORDERED:
12509 suffix = fp ? "u" : "p";
12510 break;
12511 case ORDERED:
12512 suffix = fp ? "nu" : "np";
12513 break;
12514 default:
12515 gcc_unreachable ();
12517 fputs (suffix, file);
12520 /* Print the name of register X to FILE based on its machine mode and number.
12521 If CODE is 'w', pretend the mode is HImode.
12522 If CODE is 'b', pretend the mode is QImode.
12523 If CODE is 'k', pretend the mode is SImode.
12524 If CODE is 'q', pretend the mode is DImode.
12525 If CODE is 'x', pretend the mode is V4SFmode.
12526 If CODE is 't', pretend the mode is V8SFmode.
12527 If CODE is 'g', pretend the mode is V16SFmode.
12528 If CODE is 'h', pretend the reg is the 'high' byte register.
12529 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12530 If CODE is 'd', duplicate the operand for AVX instruction.
12531 If CODE is 'V', print naked full integer register name without %.
12534 void
12535 print_reg (rtx x, int code, FILE *file)
12537 const char *reg;
12538 int msize;
12539 unsigned int regno;
12540 bool duplicated;
12542 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
12543 putc ('%', file);
12545 if (x == pc_rtx)
12547 gcc_assert (TARGET_64BIT);
12548 fputs ("rip", file);
12549 return;
12552 if (code == 'y' && STACK_TOP_P (x))
12554 fputs ("st(0)", file);
12555 return;
12558 if (code == 'w')
12559 msize = 2;
12560 else if (code == 'b')
12561 msize = 1;
12562 else if (code == 'k')
12563 msize = 4;
12564 else if (code == 'q')
12565 msize = 8;
12566 else if (code == 'h')
12567 msize = 0;
12568 else if (code == 'x')
12569 msize = 16;
12570 else if (code == 't')
12571 msize = 32;
12572 else if (code == 'g')
12573 msize = 64;
12574 else
12575 msize = GET_MODE_SIZE (GET_MODE (x));
12577 regno = REGNO (x);
12579 if (regno == ARG_POINTER_REGNUM
12580 || regno == FRAME_POINTER_REGNUM
12581 || regno == FPSR_REG)
12583 output_operand_lossage
12584 ("invalid use of register '%s'", reg_names[regno]);
12585 return;
12587 else if (regno == FLAGS_REG)
12589 output_operand_lossage ("invalid use of asm flag output");
12590 return;
12593 if (code == 'V')
12595 if (GENERAL_REGNO_P (regno))
12596 msize = GET_MODE_SIZE (word_mode);
12597 else
12598 error ("%<V%> modifier on non-integer register");
12601 duplicated = code == 'd' && TARGET_AVX;
12603 switch (msize)
12605 case 16:
12606 case 12:
12607 case 8:
12608 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
12609 warning (0, "unsupported size for integer register");
12610 /* FALLTHRU */
12611 case 4:
12612 if (LEGACY_INT_REGNO_P (regno))
12613 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
12614 /* FALLTHRU */
12615 case 2:
12616 normal:
12617 reg = hi_reg_name[regno];
12618 break;
12619 case 1:
12620 if (regno >= ARRAY_SIZE (qi_reg_name))
12621 goto normal;
12622 if (!ANY_QI_REGNO_P (regno))
12623 error ("unsupported size for integer register");
12624 reg = qi_reg_name[regno];
12625 break;
12626 case 0:
12627 if (regno >= ARRAY_SIZE (qi_high_reg_name))
12628 goto normal;
12629 reg = qi_high_reg_name[regno];
12630 break;
12631 case 32:
12632 case 64:
12633 if (SSE_REGNO_P (regno))
12635 gcc_assert (!duplicated);
12636 putc (msize == 32 ? 'y' : 'z', file);
12637 reg = hi_reg_name[regno] + 1;
12638 break;
12640 goto normal;
12641 default:
12642 gcc_unreachable ();
12645 fputs (reg, file);
12647 /* Irritatingly, AMD extended registers use
12648 different naming convention: "r%d[bwd]" */
12649 if (REX_INT_REGNO_P (regno))
12651 gcc_assert (TARGET_64BIT);
12652 switch (msize)
12654 case 0:
12655 error ("extended registers have no high halves");
12656 break;
12657 case 1:
12658 putc ('b', file);
12659 break;
12660 case 2:
12661 putc ('w', file);
12662 break;
12663 case 4:
12664 putc ('d', file);
12665 break;
12666 case 8:
12667 /* no suffix */
12668 break;
12669 default:
12670 error ("unsupported operand size for extended register");
12671 break;
12673 return;
12676 if (duplicated)
12678 if (ASSEMBLER_DIALECT == ASM_ATT)
12679 fprintf (file, ", %%%s", reg);
12680 else
12681 fprintf (file, ", %s", reg);
12685 /* Meaning of CODE:
12686 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12687 C -- print opcode suffix for set/cmov insn.
12688 c -- like C, but print reversed condition
12689 F,f -- likewise, but for floating-point.
12690 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12691 otherwise nothing
12692 R -- print embedded rounding and sae.
12693 r -- print only sae.
12694 z -- print the opcode suffix for the size of the current operand.
12695 Z -- likewise, with special suffixes for x87 instructions.
12696 * -- print a star (in certain assembler syntax)
12697 A -- print an absolute memory reference.
12698 E -- print address with DImode register names if TARGET_64BIT.
12699 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12700 s -- print a shift double count, followed by the assemblers argument
12701 delimiter.
12702 b -- print the QImode name of the register for the indicated operand.
12703 %b0 would print %al if operands[0] is reg 0.
12704 w -- likewise, print the HImode name of the register.
12705 k -- likewise, print the SImode name of the register.
12706 q -- likewise, print the DImode name of the register.
12707 x -- likewise, print the V4SFmode name of the register.
12708 t -- likewise, print the V8SFmode name of the register.
12709 g -- likewise, print the V16SFmode name of the register.
12710 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12711 y -- print "st(0)" instead of "st" as a register.
12712 d -- print duplicated register operand for AVX instruction.
12713 D -- print condition for SSE cmp instruction.
12714 P -- if PIC, print an @PLT suffix.
12715 p -- print raw symbol name.
12716 X -- don't print any sort of PIC '@' suffix for a symbol.
12717 & -- print some in-use local-dynamic symbol name.
12718 H -- print a memory address offset by 8; used for sse high-parts
12719 Y -- print condition for XOP pcom* instruction.
12720 V -- print naked full integer register name without %.
12721 + -- print a branch hint as 'cs' or 'ds' prefix
12722 ; -- print a semicolon (after prefixes due to bug in older gas).
12723 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
12724 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
12725 M -- print addr32 prefix for TARGET_X32 with VSIB address.
12726 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
12727 N -- print maskz if it's constant 0 operand.
12730 void
12731 ix86_print_operand (FILE *file, rtx x, int code)
12733 if (code)
12735 switch (code)
12737 case 'A':
12738 switch (ASSEMBLER_DIALECT)
12740 case ASM_ATT:
12741 putc ('*', file);
12742 break;
12744 case ASM_INTEL:
12745 /* Intel syntax. For absolute addresses, registers should not
12746 be surrounded by braces. */
12747 if (!REG_P (x))
12749 putc ('[', file);
12750 ix86_print_operand (file, x, 0);
12751 putc (']', file);
12752 return;
12754 break;
12756 default:
12757 gcc_unreachable ();
12760 ix86_print_operand (file, x, 0);
12761 return;
12763 case 'E':
12764 /* Wrap address in an UNSPEC to declare special handling. */
12765 if (TARGET_64BIT)
12766 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
12768 output_address (VOIDmode, x);
12769 return;
12771 case 'L':
12772 if (ASSEMBLER_DIALECT == ASM_ATT)
12773 putc ('l', file);
12774 return;
12776 case 'W':
12777 if (ASSEMBLER_DIALECT == ASM_ATT)
12778 putc ('w', file);
12779 return;
12781 case 'B':
12782 if (ASSEMBLER_DIALECT == ASM_ATT)
12783 putc ('b', file);
12784 return;
12786 case 'Q':
12787 if (ASSEMBLER_DIALECT == ASM_ATT)
12788 putc ('l', file);
12789 return;
12791 case 'S':
12792 if (ASSEMBLER_DIALECT == ASM_ATT)
12793 putc ('s', file);
12794 return;
12796 case 'T':
12797 if (ASSEMBLER_DIALECT == ASM_ATT)
12798 putc ('t', file);
12799 return;
12801 case 'O':
12802 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12803 if (ASSEMBLER_DIALECT != ASM_ATT)
12804 return;
12806 switch (GET_MODE_SIZE (GET_MODE (x)))
12808 case 2:
12809 putc ('w', file);
12810 break;
12812 case 4:
12813 putc ('l', file);
12814 break;
12816 case 8:
12817 putc ('q', file);
12818 break;
12820 default:
12821 output_operand_lossage ("invalid operand size for operand "
12822 "code 'O'");
12823 return;
12826 putc ('.', file);
12827 #endif
12828 return;
12830 case 'z':
12831 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12833 /* Opcodes don't get size suffixes if using Intel opcodes. */
12834 if (ASSEMBLER_DIALECT == ASM_INTEL)
12835 return;
12837 switch (GET_MODE_SIZE (GET_MODE (x)))
12839 case 1:
12840 putc ('b', file);
12841 return;
12843 case 2:
12844 putc ('w', file);
12845 return;
12847 case 4:
12848 putc ('l', file);
12849 return;
12851 case 8:
12852 putc ('q', file);
12853 return;
12855 default:
12856 output_operand_lossage ("invalid operand size for operand "
12857 "code 'z'");
12858 return;
12862 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12863 warning (0, "non-integer operand used with operand code %<z%>");
12864 /* FALLTHRU */
12866 case 'Z':
12867 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12868 if (ASSEMBLER_DIALECT == ASM_INTEL)
12869 return;
12871 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12873 switch (GET_MODE_SIZE (GET_MODE (x)))
12875 case 2:
12876 #ifdef HAVE_AS_IX86_FILDS
12877 putc ('s', file);
12878 #endif
12879 return;
12881 case 4:
12882 putc ('l', file);
12883 return;
12885 case 8:
12886 #ifdef HAVE_AS_IX86_FILDQ
12887 putc ('q', file);
12888 #else
12889 fputs ("ll", file);
12890 #endif
12891 return;
12893 default:
12894 break;
12897 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12899 /* 387 opcodes don't get size suffixes
12900 if the operands are registers. */
12901 if (STACK_REG_P (x))
12902 return;
12904 switch (GET_MODE_SIZE (GET_MODE (x)))
12906 case 4:
12907 putc ('s', file);
12908 return;
12910 case 8:
12911 putc ('l', file);
12912 return;
12914 case 12:
12915 case 16:
12916 putc ('t', file);
12917 return;
12919 default:
12920 break;
12923 else
12925 output_operand_lossage ("invalid operand type used with "
12926 "operand code 'Z'");
12927 return;
12930 output_operand_lossage ("invalid operand size for operand code 'Z'");
12931 return;
12933 case 'd':
12934 case 'b':
12935 case 'w':
12936 case 'k':
12937 case 'q':
12938 case 'h':
12939 case 't':
12940 case 'g':
12941 case 'y':
12942 case 'x':
12943 case 'X':
12944 case 'P':
12945 case 'p':
12946 case 'V':
12947 break;
12949 case 's':
12950 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12952 ix86_print_operand (file, x, 0);
12953 fputs (", ", file);
12955 return;
12957 case 'Y':
12958 switch (GET_CODE (x))
12960 case NE:
12961 fputs ("neq", file);
12962 break;
12963 case EQ:
12964 fputs ("eq", file);
12965 break;
12966 case GE:
12967 case GEU:
12968 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12969 break;
12970 case GT:
12971 case GTU:
12972 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12973 break;
12974 case LE:
12975 case LEU:
12976 fputs ("le", file);
12977 break;
12978 case LT:
12979 case LTU:
12980 fputs ("lt", file);
12981 break;
12982 case UNORDERED:
12983 fputs ("unord", file);
12984 break;
12985 case ORDERED:
12986 fputs ("ord", file);
12987 break;
12988 case UNEQ:
12989 fputs ("ueq", file);
12990 break;
12991 case UNGE:
12992 fputs ("nlt", file);
12993 break;
12994 case UNGT:
12995 fputs ("nle", file);
12996 break;
12997 case UNLE:
12998 fputs ("ule", file);
12999 break;
13000 case UNLT:
13001 fputs ("ult", file);
13002 break;
13003 case LTGT:
13004 fputs ("une", file);
13005 break;
13006 default:
13007 output_operand_lossage ("operand is not a condition code, "
13008 "invalid operand code 'Y'");
13009 return;
13011 return;
13013 case 'D':
13014 /* Little bit of braindamage here. The SSE compare instructions
13015 does use completely different names for the comparisons that the
13016 fp conditional moves. */
13017 switch (GET_CODE (x))
13019 case UNEQ:
13020 if (TARGET_AVX)
13022 fputs ("eq_us", file);
13023 break;
13025 /* FALLTHRU */
13026 case EQ:
13027 fputs ("eq", file);
13028 break;
13029 case UNLT:
13030 if (TARGET_AVX)
13032 fputs ("nge", file);
13033 break;
13035 /* FALLTHRU */
13036 case LT:
13037 fputs ("lt", file);
13038 break;
13039 case UNLE:
13040 if (TARGET_AVX)
13042 fputs ("ngt", file);
13043 break;
13045 /* FALLTHRU */
13046 case LE:
13047 fputs ("le", file);
13048 break;
13049 case UNORDERED:
13050 fputs ("unord", file);
13051 break;
13052 case LTGT:
13053 if (TARGET_AVX)
13055 fputs ("neq_oq", file);
13056 break;
13058 /* FALLTHRU */
13059 case NE:
13060 fputs ("neq", file);
13061 break;
13062 case GE:
13063 if (TARGET_AVX)
13065 fputs ("ge", file);
13066 break;
13068 /* FALLTHRU */
13069 case UNGE:
13070 fputs ("nlt", file);
13071 break;
13072 case GT:
13073 if (TARGET_AVX)
13075 fputs ("gt", file);
13076 break;
13078 /* FALLTHRU */
13079 case UNGT:
13080 fputs ("nle", file);
13081 break;
13082 case ORDERED:
13083 fputs ("ord", file);
13084 break;
13085 default:
13086 output_operand_lossage ("operand is not a condition code, "
13087 "invalid operand code 'D'");
13088 return;
13090 return;
13092 case 'F':
13093 case 'f':
13094 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13095 if (ASSEMBLER_DIALECT == ASM_ATT)
13096 putc ('.', file);
13097 gcc_fallthrough ();
13098 #endif
13100 case 'C':
13101 case 'c':
13102 if (!COMPARISON_P (x))
13104 output_operand_lossage ("operand is not a condition code, "
13105 "invalid operand code '%c'", code);
13106 return;
13108 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
13109 code == 'c' || code == 'f',
13110 code == 'F' || code == 'f',
13111 file);
13112 return;
13114 case 'H':
13115 if (!offsettable_memref_p (x))
13117 output_operand_lossage ("operand is not an offsettable memory "
13118 "reference, invalid operand code 'H'");
13119 return;
13121 /* It doesn't actually matter what mode we use here, as we're
13122 only going to use this for printing. */
13123 x = adjust_address_nv (x, DImode, 8);
13124 /* Output 'qword ptr' for intel assembler dialect. */
13125 if (ASSEMBLER_DIALECT == ASM_INTEL)
13126 code = 'q';
13127 break;
13129 case 'K':
13130 if (!CONST_INT_P (x))
13132 output_operand_lossage ("operand is not an integer, invalid "
13133 "operand code 'K'");
13134 return;
13137 if (INTVAL (x) & IX86_HLE_ACQUIRE)
13138 #ifdef HAVE_AS_IX86_HLE
13139 fputs ("xacquire ", file);
13140 #else
13141 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
13142 #endif
13143 else if (INTVAL (x) & IX86_HLE_RELEASE)
13144 #ifdef HAVE_AS_IX86_HLE
13145 fputs ("xrelease ", file);
13146 #else
13147 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
13148 #endif
13149 /* We do not want to print value of the operand. */
13150 return;
13152 case 'N':
13153 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
13154 fputs ("{z}", file);
13155 return;
13157 case 'r':
13158 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
13160 output_operand_lossage ("operand is not a specific integer, "
13161 "invalid operand code 'r'");
13162 return;
13165 if (ASSEMBLER_DIALECT == ASM_INTEL)
13166 fputs (", ", file);
13168 fputs ("{sae}", file);
13170 if (ASSEMBLER_DIALECT == ASM_ATT)
13171 fputs (", ", file);
13173 return;
13175 case 'R':
13176 if (!CONST_INT_P (x))
13178 output_operand_lossage ("operand is not an integer, invalid "
13179 "operand code 'R'");
13180 return;
13183 if (ASSEMBLER_DIALECT == ASM_INTEL)
13184 fputs (", ", file);
13186 switch (INTVAL (x))
13188 case ROUND_NEAREST_INT | ROUND_SAE:
13189 fputs ("{rn-sae}", file);
13190 break;
13191 case ROUND_NEG_INF | ROUND_SAE:
13192 fputs ("{rd-sae}", file);
13193 break;
13194 case ROUND_POS_INF | ROUND_SAE:
13195 fputs ("{ru-sae}", file);
13196 break;
13197 case ROUND_ZERO | ROUND_SAE:
13198 fputs ("{rz-sae}", file);
13199 break;
13200 default:
13201 output_operand_lossage ("operand is not a specific integer, "
13202 "invalid operand code 'R'");
13205 if (ASSEMBLER_DIALECT == ASM_ATT)
13206 fputs (", ", file);
13208 return;
13210 case '*':
13211 if (ASSEMBLER_DIALECT == ASM_ATT)
13212 putc ('*', file);
13213 return;
13215 case '&':
13217 const char *name = get_some_local_dynamic_name ();
13218 if (name == NULL)
13219 output_operand_lossage ("'%%&' used without any "
13220 "local dynamic TLS references");
13221 else
13222 assemble_name (file, name);
13223 return;
13226 case '+':
13228 rtx x;
13230 if (!optimize
13231 || optimize_function_for_size_p (cfun)
13232 || !TARGET_BRANCH_PREDICTION_HINTS)
13233 return;
13235 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13236 if (x)
13238 int pred_val = profile_probability::from_reg_br_prob_note
13239 (XINT (x, 0)).to_reg_br_prob_base ();
13241 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13242 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13244 bool taken = pred_val > REG_BR_PROB_BASE / 2;
13245 bool cputaken
13246 = final_forward_branch_p (current_output_insn) == 0;
13248 /* Emit hints only in the case default branch prediction
13249 heuristics would fail. */
13250 if (taken != cputaken)
13252 /* We use 3e (DS) prefix for taken branches and
13253 2e (CS) prefix for not taken branches. */
13254 if (taken)
13255 fputs ("ds ; ", file);
13256 else
13257 fputs ("cs ; ", file);
13261 return;
13264 case ';':
13265 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13266 putc (';', file);
13267 #endif
13268 return;
13270 case '~':
13271 putc (TARGET_AVX2 ? 'i' : 'f', file);
13272 return;
13274 case 'M':
13275 if (TARGET_X32)
13277 /* NB: 32-bit indices in VSIB address are sign-extended
13278 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
13279 sign-extended to 0xfffffffff7fa3010 which is invalid
13280 address. Add addr32 prefix if there is no base
13281 register nor symbol. */
13282 bool ok;
13283 struct ix86_address parts;
13284 ok = ix86_decompose_address (x, &parts);
13285 gcc_assert (ok && parts.index == NULL_RTX);
13286 if (parts.base == NULL_RTX
13287 && (parts.disp == NULL_RTX
13288 || !symbolic_operand (parts.disp,
13289 GET_MODE (parts.disp))))
13290 fputs ("addr32 ", file);
13292 return;
13294 case '^':
13295 if (TARGET_64BIT && Pmode != word_mode)
13296 fputs ("addr32 ", file);
13297 return;
13299 case '!':
13300 if (ix86_notrack_prefixed_insn_p (current_output_insn))
13301 fputs ("notrack ", file);
13302 return;
13304 default:
13305 output_operand_lossage ("invalid operand code '%c'", code);
13309 if (REG_P (x))
13310 print_reg (x, code, file);
13312 else if (MEM_P (x))
13314 rtx addr = XEXP (x, 0);
13316 /* No `byte ptr' prefix for call instructions ... */
13317 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
13319 machine_mode mode = GET_MODE (x);
13320 const char *size;
13322 /* Check for explicit size override codes. */
13323 if (code == 'b')
13324 size = "BYTE";
13325 else if (code == 'w')
13326 size = "WORD";
13327 else if (code == 'k')
13328 size = "DWORD";
13329 else if (code == 'q')
13330 size = "QWORD";
13331 else if (code == 'x')
13332 size = "XMMWORD";
13333 else if (code == 't')
13334 size = "YMMWORD";
13335 else if (code == 'g')
13336 size = "ZMMWORD";
13337 else if (mode == BLKmode)
13338 /* ... or BLKmode operands, when not overridden. */
13339 size = NULL;
13340 else
13341 switch (GET_MODE_SIZE (mode))
13343 case 1: size = "BYTE"; break;
13344 case 2: size = "WORD"; break;
13345 case 4: size = "DWORD"; break;
13346 case 8: size = "QWORD"; break;
13347 case 12: size = "TBYTE"; break;
13348 case 16:
13349 if (mode == XFmode)
13350 size = "TBYTE";
13351 else
13352 size = "XMMWORD";
13353 break;
13354 case 32: size = "YMMWORD"; break;
13355 case 64: size = "ZMMWORD"; break;
13356 default:
13357 gcc_unreachable ();
13359 if (size)
13361 fputs (size, file);
13362 fputs (" PTR ", file);
13366 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
13367 output_operand_lossage ("invalid constraints for operand");
13368 else
13369 ix86_print_operand_address_as
13370 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
13373 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
13375 long l;
13377 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13379 if (ASSEMBLER_DIALECT == ASM_ATT)
13380 putc ('$', file);
13381 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13382 if (code == 'q')
13383 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
13384 (unsigned long long) (int) l);
13385 else
13386 fprintf (file, "0x%08x", (unsigned int) l);
13389 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
13391 long l[2];
13393 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13395 if (ASSEMBLER_DIALECT == ASM_ATT)
13396 putc ('$', file);
13397 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
13400 /* These float cases don't actually occur as immediate operands. */
13401 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
13403 char dstr[30];
13405 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13406 fputs (dstr, file);
13409 /* Print bcst_mem_operand. */
13410 else if (GET_CODE (x) == VEC_DUPLICATE)
13412 machine_mode vmode = GET_MODE (x);
13413 /* Must be bcst_memory_operand. */
13414 gcc_assert (bcst_mem_operand (x, vmode));
13416 rtx mem = XEXP (x,0);
13417 ix86_print_operand (file, mem, 0);
13419 switch (vmode)
13421 case E_V2DImode:
13422 case E_V2DFmode:
13423 fputs ("{1to2}", file);
13424 break;
13425 case E_V4SImode:
13426 case E_V4SFmode:
13427 case E_V4DImode:
13428 case E_V4DFmode:
13429 fputs ("{1to4}", file);
13430 break;
13431 case E_V8SImode:
13432 case E_V8SFmode:
13433 case E_V8DFmode:
13434 case E_V8DImode:
13435 fputs ("{1to8}", file);
13436 break;
13437 case E_V16SFmode:
13438 case E_V16SImode:
13439 fputs ("{1to16}", file);
13440 break;
13441 default:
13442 gcc_unreachable ();
13446 else
13448 /* We have patterns that allow zero sets of memory, for instance.
13449 In 64-bit mode, we should probably support all 8-byte vectors,
13450 since we can in fact encode that into an immediate. */
13451 if (GET_CODE (x) == CONST_VECTOR)
13453 if (x != CONST0_RTX (GET_MODE (x)))
13454 output_operand_lossage ("invalid vector immediate");
13455 x = const0_rtx;
13458 if (code != 'P' && code != 'p')
13460 if (CONST_INT_P (x))
13462 if (ASSEMBLER_DIALECT == ASM_ATT)
13463 putc ('$', file);
13465 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13466 || GET_CODE (x) == LABEL_REF)
13468 if (ASSEMBLER_DIALECT == ASM_ATT)
13469 putc ('$', file);
13470 else
13471 fputs ("OFFSET FLAT:", file);
13474 if (CONST_INT_P (x))
13475 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13476 else if (flag_pic || MACHOPIC_INDIRECT)
13477 output_pic_addr_const (file, x, code);
13478 else
13479 output_addr_const (file, x);
13483 static bool
13484 ix86_print_operand_punct_valid_p (unsigned char code)
13486 return (code == '*' || code == '+' || code == '&' || code == ';'
13487 || code == '~' || code == '^' || code == '!');
13490 /* Print a memory operand whose address is ADDR. */
13492 static void
13493 ix86_print_operand_address_as (FILE *file, rtx addr,
13494 addr_space_t as, bool no_rip)
13496 struct ix86_address parts;
13497 rtx base, index, disp;
13498 int scale;
13499 int ok;
13500 bool vsib = false;
13501 int code = 0;
13503 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
13505 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13506 gcc_assert (parts.index == NULL_RTX);
13507 parts.index = XVECEXP (addr, 0, 1);
13508 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
13509 addr = XVECEXP (addr, 0, 0);
13510 vsib = true;
13512 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
13514 gcc_assert (TARGET_64BIT);
13515 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13516 code = 'q';
13518 else
13519 ok = ix86_decompose_address (addr, &parts);
13521 gcc_assert (ok);
13523 base = parts.base;
13524 index = parts.index;
13525 disp = parts.disp;
13526 scale = parts.scale;
13528 if (ADDR_SPACE_GENERIC_P (as))
13529 as = parts.seg;
13530 else
13531 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
13533 if (!ADDR_SPACE_GENERIC_P (as))
13535 if (ASSEMBLER_DIALECT == ASM_ATT)
13536 putc ('%', file);
13538 switch (as)
13540 case ADDR_SPACE_SEG_FS:
13541 fputs ("fs:", file);
13542 break;
13543 case ADDR_SPACE_SEG_GS:
13544 fputs ("gs:", file);
13545 break;
13546 default:
13547 gcc_unreachable ();
13551 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13552 if (TARGET_64BIT && !base && !index && !no_rip)
13554 rtx symbol = disp;
13556 if (GET_CODE (disp) == CONST
13557 && GET_CODE (XEXP (disp, 0)) == PLUS
13558 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13559 symbol = XEXP (XEXP (disp, 0), 0);
13561 if (GET_CODE (symbol) == LABEL_REF
13562 || (GET_CODE (symbol) == SYMBOL_REF
13563 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13564 base = pc_rtx;
13567 if (!base && !index)
13569 /* Displacement only requires special attention. */
13570 if (CONST_INT_P (disp))
13572 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
13573 fputs ("ds:", file);
13574 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13576 /* Load the external function address via the GOT slot to avoid PLT. */
13577 else if (GET_CODE (disp) == CONST
13578 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13579 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
13580 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
13581 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
13582 output_pic_addr_const (file, disp, 0);
13583 else if (flag_pic)
13584 output_pic_addr_const (file, disp, 0);
13585 else
13586 output_addr_const (file, disp);
13588 else
13590 /* Print SImode register names to force addr32 prefix. */
13591 if (SImode_address_operand (addr, VOIDmode))
13593 if (flag_checking)
13595 gcc_assert (TARGET_64BIT);
13596 switch (GET_CODE (addr))
13598 case SUBREG:
13599 gcc_assert (GET_MODE (addr) == SImode);
13600 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
13601 break;
13602 case ZERO_EXTEND:
13603 case AND:
13604 gcc_assert (GET_MODE (addr) == DImode);
13605 break;
13606 default:
13607 gcc_unreachable ();
13610 gcc_assert (!code);
13611 code = 'k';
13613 else if (code == 0
13614 && TARGET_X32
13615 && disp
13616 && CONST_INT_P (disp)
13617 && INTVAL (disp) < -16*1024*1024)
13619 /* X32 runs in 64-bit mode, where displacement, DISP, in
13620 address DISP(%r64), is encoded as 32-bit immediate sign-
13621 extended from 32-bit to 64-bit. For -0x40000300(%r64),
13622 address is %r64 + 0xffffffffbffffd00. When %r64 <
13623 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
13624 which is invalid for x32. The correct address is %r64
13625 - 0x40000300 == 0xf7ffdd64. To properly encode
13626 -0x40000300(%r64) for x32, we zero-extend negative
13627 displacement by forcing addr32 prefix which truncates
13628 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
13629 zero-extend all negative displacements, including -1(%rsp).
13630 However, for small negative displacements, sign-extension
13631 won't cause overflow. We only zero-extend negative
13632 displacements if they < -16*1024*1024, which is also used
13633 to check legitimate address displacements for PIC. */
13634 code = 'k';
13637 /* Since the upper 32 bits of RSP are always zero for x32,
13638 we can encode %esp as %rsp to avoid 0x67 prefix if
13639 there is no index register. */
13640 if (TARGET_X32 && Pmode == SImode
13641 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
13642 code = 'q';
13644 if (ASSEMBLER_DIALECT == ASM_ATT)
13646 if (disp)
13648 if (flag_pic)
13649 output_pic_addr_const (file, disp, 0);
13650 else if (GET_CODE (disp) == LABEL_REF)
13651 output_asm_label (disp);
13652 else
13653 output_addr_const (file, disp);
13656 putc ('(', file);
13657 if (base)
13658 print_reg (base, code, file);
13659 if (index)
13661 putc (',', file);
13662 print_reg (index, vsib ? 0 : code, file);
13663 if (scale != 1 || vsib)
13664 fprintf (file, ",%d", scale);
13666 putc (')', file);
13668 else
13670 rtx offset = NULL_RTX;
13672 if (disp)
13674 /* Pull out the offset of a symbol; print any symbol itself. */
13675 if (GET_CODE (disp) == CONST
13676 && GET_CODE (XEXP (disp, 0)) == PLUS
13677 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13679 offset = XEXP (XEXP (disp, 0), 1);
13680 disp = gen_rtx_CONST (VOIDmode,
13681 XEXP (XEXP (disp, 0), 0));
13684 if (flag_pic)
13685 output_pic_addr_const (file, disp, 0);
13686 else if (GET_CODE (disp) == LABEL_REF)
13687 output_asm_label (disp);
13688 else if (CONST_INT_P (disp))
13689 offset = disp;
13690 else
13691 output_addr_const (file, disp);
13694 putc ('[', file);
13695 if (base)
13697 print_reg (base, code, file);
13698 if (offset)
13700 if (INTVAL (offset) >= 0)
13701 putc ('+', file);
13702 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13705 else if (offset)
13706 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13707 else
13708 putc ('0', file);
13710 if (index)
13712 putc ('+', file);
13713 print_reg (index, vsib ? 0 : code, file);
13714 if (scale != 1 || vsib)
13715 fprintf (file, "*%d", scale);
13717 putc (']', file);
13722 static void
13723 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
13725 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
13728 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13730 static bool
13731 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13733 rtx op;
13735 if (GET_CODE (x) != UNSPEC)
13736 return false;
13738 op = XVECEXP (x, 0, 0);
13739 switch (XINT (x, 1))
13741 case UNSPEC_GOTOFF:
13742 output_addr_const (file, op);
13743 fputs ("@gotoff", file);
13744 break;
13745 case UNSPEC_GOTTPOFF:
13746 output_addr_const (file, op);
13747 /* FIXME: This might be @TPOFF in Sun ld. */
13748 fputs ("@gottpoff", file);
13749 break;
13750 case UNSPEC_TPOFF:
13751 output_addr_const (file, op);
13752 fputs ("@tpoff", file);
13753 break;
13754 case UNSPEC_NTPOFF:
13755 output_addr_const (file, op);
13756 if (TARGET_64BIT)
13757 fputs ("@tpoff", file);
13758 else
13759 fputs ("@ntpoff", file);
13760 break;
13761 case UNSPEC_DTPOFF:
13762 output_addr_const (file, op);
13763 fputs ("@dtpoff", file);
13764 break;
13765 case UNSPEC_GOTNTPOFF:
13766 output_addr_const (file, op);
13767 if (TARGET_64BIT)
13768 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13769 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13770 else
13771 fputs ("@gotntpoff", file);
13772 break;
13773 case UNSPEC_INDNTPOFF:
13774 output_addr_const (file, op);
13775 fputs ("@indntpoff", file);
13776 break;
13777 #if TARGET_MACHO
13778 case UNSPEC_MACHOPIC_OFFSET:
13779 output_addr_const (file, op);
13780 putc ('-', file);
13781 machopic_output_function_base_name (file);
13782 break;
13783 #endif
13785 default:
13786 return false;
13789 return true;
13793 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13794 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13795 is the expression of the binary operation. The output may either be
13796 emitted here, or returned to the caller, like all output_* functions.
13798 There is no guarantee that the operands are the same mode, as they
13799 might be within FLOAT or FLOAT_EXTEND expressions. */
13801 #ifndef SYSV386_COMPAT
13802 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13803 wants to fix the assemblers because that causes incompatibility
13804 with gcc. No-one wants to fix gcc because that causes
13805 incompatibility with assemblers... You can use the option of
13806 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13807 #define SYSV386_COMPAT 1
13808 #endif
13810 const char *
13811 output_387_binary_op (rtx_insn *insn, rtx *operands)
13813 static char buf[40];
13814 const char *p;
13815 bool is_sse
13816 = (SSE_REG_P (operands[0])
13817 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
13819 if (is_sse)
13820 p = "%v";
13821 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13822 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13823 p = "fi";
13824 else
13825 p = "f";
13827 strcpy (buf, p);
13829 switch (GET_CODE (operands[3]))
13831 case PLUS:
13832 p = "add"; break;
13833 case MINUS:
13834 p = "sub"; break;
13835 case MULT:
13836 p = "mul"; break;
13837 case DIV:
13838 p = "div"; break;
13839 default:
13840 gcc_unreachable ();
13843 strcat (buf, p);
13845 if (is_sse)
13847 p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
13848 strcat (buf, p);
13850 if (TARGET_AVX)
13851 p = "\t{%2, %1, %0|%0, %1, %2}";
13852 else
13853 p = "\t{%2, %0|%0, %2}";
13855 strcat (buf, p);
13856 return buf;
13859 /* Even if we do not want to check the inputs, this documents input
13860 constraints. Which helps in understanding the following code. */
13861 if (flag_checking)
13863 if (STACK_REG_P (operands[0])
13864 && ((REG_P (operands[1])
13865 && REGNO (operands[0]) == REGNO (operands[1])
13866 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13867 || (REG_P (operands[2])
13868 && REGNO (operands[0]) == REGNO (operands[2])
13869 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13870 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13871 ; /* ok */
13872 else
13873 gcc_unreachable ();
13876 switch (GET_CODE (operands[3]))
13878 case MULT:
13879 case PLUS:
13880 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13881 std::swap (operands[1], operands[2]);
13883 /* know operands[0] == operands[1]. */
13885 if (MEM_P (operands[2]))
13887 p = "%Z2\t%2";
13888 break;
13891 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13893 if (STACK_TOP_P (operands[0]))
13894 /* How is it that we are storing to a dead operand[2]?
13895 Well, presumably operands[1] is dead too. We can't
13896 store the result to st(0) as st(0) gets popped on this
13897 instruction. Instead store to operands[2] (which I
13898 think has to be st(1)). st(1) will be popped later.
13899 gcc <= 2.8.1 didn't have this check and generated
13900 assembly code that the Unixware assembler rejected. */
13901 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13902 else
13903 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13904 break;
13907 if (STACK_TOP_P (operands[0]))
13908 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13909 else
13910 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13911 break;
13913 case MINUS:
13914 case DIV:
13915 if (MEM_P (operands[1]))
13917 p = "r%Z1\t%1";
13918 break;
13921 if (MEM_P (operands[2]))
13923 p = "%Z2\t%2";
13924 break;
13927 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13929 #if SYSV386_COMPAT
13930 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13931 derived assemblers, confusingly reverse the direction of
13932 the operation for fsub{r} and fdiv{r} when the
13933 destination register is not st(0). The Intel assembler
13934 doesn't have this brain damage. Read !SYSV386_COMPAT to
13935 figure out what the hardware really does. */
13936 if (STACK_TOP_P (operands[0]))
13937 p = "{p\t%0, %2|rp\t%2, %0}";
13938 else
13939 p = "{rp\t%2, %0|p\t%0, %2}";
13940 #else
13941 if (STACK_TOP_P (operands[0]))
13942 /* As above for fmul/fadd, we can't store to st(0). */
13943 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13944 else
13945 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13946 #endif
13947 break;
13950 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13952 #if SYSV386_COMPAT
13953 if (STACK_TOP_P (operands[0]))
13954 p = "{rp\t%0, %1|p\t%1, %0}";
13955 else
13956 p = "{p\t%1, %0|rp\t%0, %1}";
13957 #else
13958 if (STACK_TOP_P (operands[0]))
13959 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13960 else
13961 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13962 #endif
13963 break;
13966 if (STACK_TOP_P (operands[0]))
13968 if (STACK_TOP_P (operands[1]))
13969 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13970 else
13971 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13972 break;
13974 else if (STACK_TOP_P (operands[1]))
13976 #if SYSV386_COMPAT
13977 p = "{\t%1, %0|r\t%0, %1}";
13978 #else
13979 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13980 #endif
13982 else
13984 #if SYSV386_COMPAT
13985 p = "{r\t%2, %0|\t%0, %2}";
13986 #else
13987 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13988 #endif
13990 break;
13992 default:
13993 gcc_unreachable ();
13996 strcat (buf, p);
13997 return buf;
14000 /* Return needed mode for entity in optimize_mode_switching pass. */
14002 static int
14003 ix86_dirflag_mode_needed (rtx_insn *insn)
14005 if (CALL_P (insn))
14007 if (cfun->machine->func_type == TYPE_NORMAL)
14008 return X86_DIRFLAG_ANY;
14009 else
14010 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
14011 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
14014 if (recog_memoized (insn) < 0)
14015 return X86_DIRFLAG_ANY;
14017 if (get_attr_type (insn) == TYPE_STR)
14019 /* Emit cld instruction if stringops are used in the function. */
14020 if (cfun->machine->func_type == TYPE_NORMAL)
14021 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
14022 else
14023 return X86_DIRFLAG_RESET;
14026 return X86_DIRFLAG_ANY;
14029 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
14031 static bool
14032 ix86_check_avx_upper_register (const_rtx exp)
14034 return (SSE_REG_P (exp)
14035 && !EXT_REX_SSE_REG_P (exp)
14036 && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
14039 /* Return needed mode for entity in optimize_mode_switching pass. */
14041 static int
14042 ix86_avx_u128_mode_needed (rtx_insn *insn)
14044 if (CALL_P (insn))
14046 rtx link;
14048 /* Needed mode is set to AVX_U128_CLEAN if there are
14049 no 256bit or 512bit modes used in function arguments. */
14050 for (link = CALL_INSN_FUNCTION_USAGE (insn);
14051 link;
14052 link = XEXP (link, 1))
14054 if (GET_CODE (XEXP (link, 0)) == USE)
14056 rtx arg = XEXP (XEXP (link, 0), 0);
14058 if (ix86_check_avx_upper_register (arg))
14059 return AVX_U128_DIRTY;
14063 /* If the function is known to preserve some SSE registers,
14064 RA and previous passes can legitimately rely on that for
14065 modes wider than 256 bits. It's only safe to issue a
14066 vzeroupper if all SSE registers are clobbered. */
14067 const function_abi &abi = insn_callee_abi (insn);
14068 if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
14069 abi.mode_clobbers (V4DImode)))
14070 return AVX_U128_ANY;
14072 return AVX_U128_CLEAN;
14075 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
14076 Hardware changes state only when a 256bit register is written to,
14077 but we need to prevent the compiler from moving optimal insertion
14078 point above eventual read from 256bit or 512 bit register. */
14079 subrtx_iterator::array_type array;
14080 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14081 if (ix86_check_avx_upper_register (*iter))
14082 return AVX_U128_DIRTY;
14084 return AVX_U128_ANY;
14087 /* Return mode that i387 must be switched into
14088 prior to the execution of insn. */
14090 static int
14091 ix86_i387_mode_needed (int entity, rtx_insn *insn)
14093 enum attr_i387_cw mode;
14095 /* The mode UNINITIALIZED is used to store control word after a
14096 function call or ASM pattern. The mode ANY specify that function
14097 has no requirements on the control word and make no changes in the
14098 bits we are interested in. */
14100 if (CALL_P (insn)
14101 || (NONJUMP_INSN_P (insn)
14102 && (asm_noperands (PATTERN (insn)) >= 0
14103 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14104 return I387_CW_UNINITIALIZED;
14106 if (recog_memoized (insn) < 0)
14107 return I387_CW_ANY;
14109 mode = get_attr_i387_cw (insn);
14111 switch (entity)
14113 case I387_ROUNDEVEN:
14114 if (mode == I387_CW_ROUNDEVEN)
14115 return mode;
14116 break;
14118 case I387_TRUNC:
14119 if (mode == I387_CW_TRUNC)
14120 return mode;
14121 break;
14123 case I387_FLOOR:
14124 if (mode == I387_CW_FLOOR)
14125 return mode;
14126 break;
14128 case I387_CEIL:
14129 if (mode == I387_CW_CEIL)
14130 return mode;
14131 break;
14133 default:
14134 gcc_unreachable ();
14137 return I387_CW_ANY;
14140 /* Return mode that entity must be switched into
14141 prior to the execution of insn. */
14143 static int
14144 ix86_mode_needed (int entity, rtx_insn *insn)
14146 switch (entity)
14148 case X86_DIRFLAG:
14149 return ix86_dirflag_mode_needed (insn);
14150 case AVX_U128:
14151 return ix86_avx_u128_mode_needed (insn);
14152 case I387_ROUNDEVEN:
14153 case I387_TRUNC:
14154 case I387_FLOOR:
14155 case I387_CEIL:
14156 return ix86_i387_mode_needed (entity, insn);
14157 default:
14158 gcc_unreachable ();
14160 return 0;
14163 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
14165 static void
14166 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
14168 if (ix86_check_avx_upper_register (dest))
14170 bool *used = (bool *) data;
14171 *used = true;
14175 /* Calculate mode of upper 128bit AVX registers after the insn. */
14177 static int
14178 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
14180 rtx pat = PATTERN (insn);
14182 if (vzeroupper_pattern (pat, VOIDmode)
14183 || vzeroall_pattern (pat, VOIDmode))
14184 return AVX_U128_CLEAN;
14186 /* We know that state is clean after CALL insn if there are no
14187 256bit or 512bit registers used in the function return register. */
14188 if (CALL_P (insn))
14190 bool avx_upper_reg_found = false;
14191 note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
14193 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
14196 /* Otherwise, return current mode. Remember that if insn
14197 references AVX 256bit or 512bit registers, the mode was already
14198 changed to DIRTY from MODE_NEEDED. */
14199 return mode;
14202 /* Return the mode that an insn results in. */
14204 static int
14205 ix86_mode_after (int entity, int mode, rtx_insn *insn)
14207 switch (entity)
14209 case X86_DIRFLAG:
14210 return mode;
14211 case AVX_U128:
14212 return ix86_avx_u128_mode_after (mode, insn);
14213 case I387_ROUNDEVEN:
14214 case I387_TRUNC:
14215 case I387_FLOOR:
14216 case I387_CEIL:
14217 return mode;
14218 default:
14219 gcc_unreachable ();
14223 static int
14224 ix86_dirflag_mode_entry (void)
14226 /* For TARGET_CLD or in the interrupt handler we can't assume
14227 direction flag state at function entry. */
14228 if (TARGET_CLD
14229 || cfun->machine->func_type != TYPE_NORMAL)
14230 return X86_DIRFLAG_ANY;
14232 return X86_DIRFLAG_RESET;
14235 static int
14236 ix86_avx_u128_mode_entry (void)
14238 tree arg;
14240 /* Entry mode is set to AVX_U128_DIRTY if there are
14241 256bit or 512bit modes used in function arguments. */
14242 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
14243 arg = TREE_CHAIN (arg))
14245 rtx incoming = DECL_INCOMING_RTL (arg);
14247 if (incoming && ix86_check_avx_upper_register (incoming))
14248 return AVX_U128_DIRTY;
14251 return AVX_U128_CLEAN;
14254 /* Return a mode that ENTITY is assumed to be
14255 switched to at function entry. */
14257 static int
14258 ix86_mode_entry (int entity)
14260 switch (entity)
14262 case X86_DIRFLAG:
14263 return ix86_dirflag_mode_entry ();
14264 case AVX_U128:
14265 return ix86_avx_u128_mode_entry ();
14266 case I387_ROUNDEVEN:
14267 case I387_TRUNC:
14268 case I387_FLOOR:
14269 case I387_CEIL:
14270 return I387_CW_ANY;
14271 default:
14272 gcc_unreachable ();
14276 static int
14277 ix86_avx_u128_mode_exit (void)
14279 rtx reg = crtl->return_rtx;
14281 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
14282 or 512 bit modes used in the function return register. */
14283 if (reg && ix86_check_avx_upper_register (reg))
14284 return AVX_U128_DIRTY;
14286 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
14287 modes used in function arguments, otherwise return AVX_U128_CLEAN.
14289 return ix86_avx_u128_mode_entry ();
14292 /* Return a mode that ENTITY is assumed to be
14293 switched to at function exit. */
14295 static int
14296 ix86_mode_exit (int entity)
14298 switch (entity)
14300 case X86_DIRFLAG:
14301 return X86_DIRFLAG_ANY;
14302 case AVX_U128:
14303 return ix86_avx_u128_mode_exit ();
14304 case I387_ROUNDEVEN:
14305 case I387_TRUNC:
14306 case I387_FLOOR:
14307 case I387_CEIL:
14308 return I387_CW_ANY;
14309 default:
14310 gcc_unreachable ();
14314 static int
14315 ix86_mode_priority (int, int n)
14317 return n;
14320 /* Output code to initialize control word copies used by trunc?f?i and
14321 rounding patterns. CURRENT_MODE is set to current control word,
14322 while NEW_MODE is set to new control word. */
14324 static void
14325 emit_i387_cw_initialization (int mode)
14327 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14328 rtx new_mode;
14330 enum ix86_stack_slot slot;
14332 rtx reg = gen_reg_rtx (HImode);
14334 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14335 emit_move_insn (reg, copy_rtx (stored_mode));
14337 switch (mode)
14339 case I387_CW_ROUNDEVEN:
14340 /* round to nearest */
14341 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14342 slot = SLOT_CW_ROUNDEVEN;
14343 break;
14345 case I387_CW_TRUNC:
14346 /* round toward zero (truncate) */
14347 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14348 slot = SLOT_CW_TRUNC;
14349 break;
14351 case I387_CW_FLOOR:
14352 /* round down toward -oo */
14353 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14354 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14355 slot = SLOT_CW_FLOOR;
14356 break;
14358 case I387_CW_CEIL:
14359 /* round up toward +oo */
14360 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14361 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14362 slot = SLOT_CW_CEIL;
14363 break;
14365 default:
14366 gcc_unreachable ();
14369 gcc_assert (slot < MAX_386_STACK_LOCALS);
14371 new_mode = assign_386_stack_local (HImode, slot);
14372 emit_move_insn (new_mode, reg);
14375 /* Generate one or more insns to set ENTITY to MODE. */
14377 static void
14378 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
14379 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14381 switch (entity)
14383 case X86_DIRFLAG:
14384 if (mode == X86_DIRFLAG_RESET)
14385 emit_insn (gen_cld ());
14386 break;
14387 case AVX_U128:
14388 if (mode == AVX_U128_CLEAN)
14389 emit_insn (gen_avx_vzeroupper ());
14390 break;
14391 case I387_ROUNDEVEN:
14392 case I387_TRUNC:
14393 case I387_FLOOR:
14394 case I387_CEIL:
14395 if (mode != I387_CW_ANY
14396 && mode != I387_CW_UNINITIALIZED)
14397 emit_i387_cw_initialization (mode);
14398 break;
14399 default:
14400 gcc_unreachable ();
14404 /* Output code for INSN to convert a float to a signed int. OPERANDS
14405 are the insn operands. The output may be [HSD]Imode and the input
14406 operand may be [SDX]Fmode. */
14408 const char *
14409 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
14411 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14412 bool dimode_p = GET_MODE (operands[0]) == DImode;
14413 int round_mode = get_attr_i387_cw (insn);
14415 static char buf[40];
14416 const char *p;
14418 /* Jump through a hoop or two for DImode, since the hardware has no
14419 non-popping instruction. We used to do this a different way, but
14420 that was somewhat fragile and broke with post-reload splitters. */
14421 if ((dimode_p || fisttp) && !stack_top_dies)
14422 output_asm_insn ("fld\t%y1", operands);
14424 gcc_assert (STACK_TOP_P (operands[1]));
14425 gcc_assert (MEM_P (operands[0]));
14426 gcc_assert (GET_MODE (operands[1]) != TFmode);
14428 if (fisttp)
14429 return "fisttp%Z0\t%0";
14431 strcpy (buf, "fist");
14433 if (round_mode != I387_CW_ANY)
14434 output_asm_insn ("fldcw\t%3", operands);
14436 p = "p%Z0\t%0";
14437 strcat (buf, p + !(stack_top_dies || dimode_p));
14439 output_asm_insn (buf, operands);
14441 if (round_mode != I387_CW_ANY)
14442 output_asm_insn ("fldcw\t%2", operands);
14444 return "";
14447 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14448 have the values zero or one, indicates the ffreep insn's operand
14449 from the OPERANDS array. */
14451 static const char *
14452 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14454 if (TARGET_USE_FFREEP)
14455 #ifdef HAVE_AS_IX86_FFREEP
14456 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14457 #else
14459 static char retval[32];
14460 int regno = REGNO (operands[opno]);
14462 gcc_assert (STACK_REGNO_P (regno));
14464 regno -= FIRST_STACK_REG;
14466 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14467 return retval;
14469 #endif
14471 return opno ? "fstp\t%y1" : "fstp\t%y0";
14475 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14476 should be used. UNORDERED_P is true when fucom should be used. */
14478 const char *
14479 output_fp_compare (rtx_insn *insn, rtx *operands,
14480 bool eflags_p, bool unordered_p)
14482 rtx *xops = eflags_p ? &operands[0] : &operands[1];
14483 bool stack_top_dies;
14485 static char buf[40];
14486 const char *p;
14488 gcc_assert (STACK_TOP_P (xops[0]));
14490 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14492 if (eflags_p)
14494 p = unordered_p ? "fucomi" : "fcomi";
14495 strcpy (buf, p);
14497 p = "p\t{%y1, %0|%0, %y1}";
14498 strcat (buf, p + !stack_top_dies);
14500 return buf;
14503 if (STACK_REG_P (xops[1])
14504 && stack_top_dies
14505 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
14507 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
14509 /* If both the top of the 387 stack die, and the other operand
14510 is also a stack register that dies, then this must be a
14511 `fcompp' float compare. */
14512 p = unordered_p ? "fucompp" : "fcompp";
14513 strcpy (buf, p);
14515 else if (const0_operand (xops[1], VOIDmode))
14517 gcc_assert (!unordered_p);
14518 strcpy (buf, "ftst");
14520 else
14522 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
14524 gcc_assert (!unordered_p);
14525 p = "ficom";
14527 else
14528 p = unordered_p ? "fucom" : "fcom";
14530 strcpy (buf, p);
14532 p = "p%Z2\t%y2";
14533 strcat (buf, p + !stack_top_dies);
14536 output_asm_insn (buf, operands);
14537 return "fnstsw\t%0";
14540 void
14541 ix86_output_addr_vec_elt (FILE *file, int value)
14543 const char *directive = ASM_LONG;
14545 #ifdef ASM_QUAD
14546 if (TARGET_LP64)
14547 directive = ASM_QUAD;
14548 #else
14549 gcc_assert (!TARGET_64BIT);
14550 #endif
14552 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14555 void
14556 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14558 const char *directive = ASM_LONG;
14560 #ifdef ASM_QUAD
14561 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14562 directive = ASM_QUAD;
14563 #else
14564 gcc_assert (!TARGET_64BIT);
14565 #endif
14566 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14567 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14568 fprintf (file, "%s%s%d-%s%d\n",
14569 directive, LPREFIX, value, LPREFIX, rel);
14570 #if TARGET_MACHO
14571 else if (TARGET_MACHO)
14573 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14574 machopic_output_function_base_name (file);
14575 putc ('\n', file);
14577 #endif
14578 else if (HAVE_AS_GOTOFF_IN_DATA)
14579 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14580 else
14581 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14582 GOT_SYMBOL_NAME, LPREFIX, value);
14585 #define LEA_MAX_STALL (3)
14586 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
14588 /* Increase given DISTANCE in half-cycles according to
14589 dependencies between PREV and NEXT instructions.
14590 Add 1 half-cycle if there is no dependency and
14591 go to next cycle if there is some dependecy. */
14593 static unsigned int
14594 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
14596 df_ref def, use;
14598 if (!prev || !next)
14599 return distance + (distance & 1) + 2;
14601 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
14602 return distance + 1;
14604 FOR_EACH_INSN_USE (use, next)
14605 FOR_EACH_INSN_DEF (def, prev)
14606 if (!DF_REF_IS_ARTIFICIAL (def)
14607 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
14608 return distance + (distance & 1) + 2;
14610 return distance + 1;
14613 /* Function checks if instruction INSN defines register number
14614 REGNO1 or REGNO2. */
14616 bool
14617 insn_defines_reg (unsigned int regno1, unsigned int regno2,
14618 rtx_insn *insn)
14620 df_ref def;
14622 FOR_EACH_INSN_DEF (def, insn)
14623 if (DF_REF_REG_DEF_P (def)
14624 && !DF_REF_IS_ARTIFICIAL (def)
14625 && (regno1 == DF_REF_REGNO (def)
14626 || regno2 == DF_REF_REGNO (def)))
14627 return true;
14629 return false;
14632 /* Function checks if instruction INSN uses register number
14633 REGNO as a part of address expression. */
14635 static bool
14636 insn_uses_reg_mem (unsigned int regno, rtx insn)
14638 df_ref use;
14640 FOR_EACH_INSN_USE (use, insn)
14641 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
14642 return true;
14644 return false;
14647 /* Search backward for non-agu definition of register number REGNO1
14648 or register number REGNO2 in basic block starting from instruction
14649 START up to head of basic block or instruction INSN.
14651 Function puts true value into *FOUND var if definition was found
14652 and false otherwise.
14654 Distance in half-cycles between START and found instruction or head
14655 of BB is added to DISTANCE and returned. */
14657 static int
14658 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
14659 rtx_insn *insn, int distance,
14660 rtx_insn *start, bool *found)
14662 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
14663 rtx_insn *prev = start;
14664 rtx_insn *next = NULL;
14666 *found = false;
14668 while (prev
14669 && prev != insn
14670 && distance < LEA_SEARCH_THRESHOLD)
14672 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
14674 distance = increase_distance (prev, next, distance);
14675 if (insn_defines_reg (regno1, regno2, prev))
14677 if (recog_memoized (prev) < 0
14678 || get_attr_type (prev) != TYPE_LEA)
14680 *found = true;
14681 return distance;
14685 next = prev;
14687 if (prev == BB_HEAD (bb))
14688 break;
14690 prev = PREV_INSN (prev);
14693 return distance;
14696 /* Search backward for non-agu definition of register number REGNO1
14697 or register number REGNO2 in INSN's basic block until
14698 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14699 2. Reach neighbor BBs boundary, or
14700 3. Reach agu definition.
14701 Returns the distance between the non-agu definition point and INSN.
14702 If no definition point, returns -1. */
14704 static int
14705 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14706 rtx_insn *insn)
14708 basic_block bb = BLOCK_FOR_INSN (insn);
14709 int distance = 0;
14710 bool found = false;
14712 if (insn != BB_HEAD (bb))
14713 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
14714 distance, PREV_INSN (insn),
14715 &found);
14717 if (!found && distance < LEA_SEARCH_THRESHOLD)
14719 edge e;
14720 edge_iterator ei;
14721 bool simple_loop = false;
14723 FOR_EACH_EDGE (e, ei, bb->preds)
14724 if (e->src == bb)
14726 simple_loop = true;
14727 break;
14730 if (simple_loop)
14731 distance = distance_non_agu_define_in_bb (regno1, regno2,
14732 insn, distance,
14733 BB_END (bb), &found);
14734 else
14736 int shortest_dist = -1;
14737 bool found_in_bb = false;
14739 FOR_EACH_EDGE (e, ei, bb->preds)
14741 int bb_dist
14742 = distance_non_agu_define_in_bb (regno1, regno2,
14743 insn, distance,
14744 BB_END (e->src),
14745 &found_in_bb);
14746 if (found_in_bb)
14748 if (shortest_dist < 0)
14749 shortest_dist = bb_dist;
14750 else if (bb_dist > 0)
14751 shortest_dist = MIN (bb_dist, shortest_dist);
14753 found = true;
14757 distance = shortest_dist;
14761 /* get_attr_type may modify recog data. We want to make sure
14762 that recog data is valid for instruction INSN, on which
14763 distance_non_agu_define is called. INSN is unchanged here. */
14764 extract_insn_cached (insn);
14766 if (!found)
14767 return -1;
14769 return distance >> 1;
14772 /* Return the distance in half-cycles between INSN and the next
14773 insn that uses register number REGNO in memory address added
14774 to DISTANCE. Return -1 if REGNO0 is set.
14776 Put true value into *FOUND if register usage was found and
14777 false otherwise.
14778 Put true value into *REDEFINED if register redefinition was
14779 found and false otherwise. */
14781 static int
14782 distance_agu_use_in_bb (unsigned int regno,
14783 rtx_insn *insn, int distance, rtx_insn *start,
14784 bool *found, bool *redefined)
14786 basic_block bb = NULL;
14787 rtx_insn *next = start;
14788 rtx_insn *prev = NULL;
14790 *found = false;
14791 *redefined = false;
14793 if (start != NULL_RTX)
14795 bb = BLOCK_FOR_INSN (start);
14796 if (start != BB_HEAD (bb))
14797 /* If insn and start belong to the same bb, set prev to insn,
14798 so the call to increase_distance will increase the distance
14799 between insns by 1. */
14800 prev = insn;
14803 while (next
14804 && next != insn
14805 && distance < LEA_SEARCH_THRESHOLD)
14807 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
14809 distance = increase_distance(prev, next, distance);
14810 if (insn_uses_reg_mem (regno, next))
14812 /* Return DISTANCE if OP0 is used in memory
14813 address in NEXT. */
14814 *found = true;
14815 return distance;
14818 if (insn_defines_reg (regno, INVALID_REGNUM, next))
14820 /* Return -1 if OP0 is set in NEXT. */
14821 *redefined = true;
14822 return -1;
14825 prev = next;
14828 if (next == BB_END (bb))
14829 break;
14831 next = NEXT_INSN (next);
14834 return distance;
14837 /* Return the distance between INSN and the next insn that uses
14838 register number REGNO0 in memory address. Return -1 if no such
14839 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14841 static int
14842 distance_agu_use (unsigned int regno0, rtx_insn *insn)
14844 basic_block bb = BLOCK_FOR_INSN (insn);
14845 int distance = 0;
14846 bool found = false;
14847 bool redefined = false;
14849 if (insn != BB_END (bb))
14850 distance = distance_agu_use_in_bb (regno0, insn, distance,
14851 NEXT_INSN (insn),
14852 &found, &redefined);
14854 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
14856 edge e;
14857 edge_iterator ei;
14858 bool simple_loop = false;
14860 FOR_EACH_EDGE (e, ei, bb->succs)
14861 if (e->dest == bb)
14863 simple_loop = true;
14864 break;
14867 if (simple_loop)
14868 distance = distance_agu_use_in_bb (regno0, insn,
14869 distance, BB_HEAD (bb),
14870 &found, &redefined);
14871 else
14873 int shortest_dist = -1;
14874 bool found_in_bb = false;
14875 bool redefined_in_bb = false;
14877 FOR_EACH_EDGE (e, ei, bb->succs)
14879 int bb_dist
14880 = distance_agu_use_in_bb (regno0, insn,
14881 distance, BB_HEAD (e->dest),
14882 &found_in_bb, &redefined_in_bb);
14883 if (found_in_bb)
14885 if (shortest_dist < 0)
14886 shortest_dist = bb_dist;
14887 else if (bb_dist > 0)
14888 shortest_dist = MIN (bb_dist, shortest_dist);
14890 found = true;
14894 distance = shortest_dist;
14898 if (!found || redefined)
14899 return -1;
14901 return distance >> 1;
14904 /* Define this macro to tune LEA priority vs ADD, it take effect when
14905 there is a dilemma of choosing LEA or ADD
14906 Negative value: ADD is more preferred than LEA
14907 Zero: Neutral
14908 Positive value: LEA is more preferred than ADD. */
14909 #define IX86_LEA_PRIORITY 0
14911 /* Return true if usage of lea INSN has performance advantage
14912 over a sequence of instructions. Instructions sequence has
14913 SPLIT_COST cycles higher latency than lea latency. */
14915 static bool
14916 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
14917 unsigned int regno2, int split_cost, bool has_scale)
14919 int dist_define, dist_use;
14921 /* For Atom processors newer than Bonnell, if using a 2-source or
14922 3-source LEA for non-destructive destination purposes, or due to
14923 wanting ability to use SCALE, the use of LEA is justified. */
14924 if (!TARGET_BONNELL)
14926 if (has_scale)
14927 return true;
14928 if (split_cost < 1)
14929 return false;
14930 if (regno0 == regno1 || regno0 == regno2)
14931 return false;
14932 return true;
14935 rtx_insn *rinsn = recog_data.insn;
14937 dist_define = distance_non_agu_define (regno1, regno2, insn);
14938 dist_use = distance_agu_use (regno0, insn);
14940 /* distance_non_agu_define can call extract_insn_cached. If this function
14941 is called from define_split conditions, that can break insn splitting,
14942 because split_insns works by clearing recog_data.insn and then modifying
14943 recog_data.operand array and match the various split conditions. */
14944 if (recog_data.insn != rinsn)
14945 recog_data.insn = NULL;
14947 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
14949 /* If there is no non AGU operand definition, no AGU
14950 operand usage and split cost is 0 then both lea
14951 and non lea variants have same priority. Currently
14952 we prefer lea for 64 bit code and non lea on 32 bit
14953 code. */
14954 if (dist_use < 0 && split_cost == 0)
14955 return TARGET_64BIT || IX86_LEA_PRIORITY;
14956 else
14957 return true;
14960 /* With longer definitions distance lea is more preferable.
14961 Here we change it to take into account splitting cost and
14962 lea priority. */
14963 dist_define += split_cost + IX86_LEA_PRIORITY;
14965 /* If there is no use in memory addess then we just check
14966 that split cost exceeds AGU stall. */
14967 if (dist_use < 0)
14968 return dist_define > LEA_MAX_STALL;
14970 /* If this insn has both backward non-agu dependence and forward
14971 agu dependence, the one with short distance takes effect. */
14972 return dist_define >= dist_use;
14975 /* Return true if it is legal to clobber flags by INSN and
14976 false otherwise. */
14978 static bool
14979 ix86_ok_to_clobber_flags (rtx_insn *insn)
14981 basic_block bb = BLOCK_FOR_INSN (insn);
14982 df_ref use;
14983 bitmap live;
14985 while (insn)
14987 if (NONDEBUG_INSN_P (insn))
14989 FOR_EACH_INSN_USE (use, insn)
14990 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
14991 return false;
14993 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
14994 return true;
14997 if (insn == BB_END (bb))
14998 break;
15000 insn = NEXT_INSN (insn);
15003 live = df_get_live_out(bb);
15004 return !REGNO_REG_SET_P (live, FLAGS_REG);
15007 /* Return true if we need to split op0 = op1 + op2 into a sequence of
15008 move and add to avoid AGU stalls. */
15010 bool
15011 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
15013 unsigned int regno0, regno1, regno2;
15015 /* Check if we need to optimize. */
15016 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15017 return false;
15019 /* Check it is correct to split here. */
15020 if (!ix86_ok_to_clobber_flags(insn))
15021 return false;
15023 regno0 = true_regnum (operands[0]);
15024 regno1 = true_regnum (operands[1]);
15025 regno2 = true_regnum (operands[2]);
15027 /* We need to split only adds with non destructive
15028 destination operand. */
15029 if (regno0 == regno1 || regno0 == regno2)
15030 return false;
15031 else
15032 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
15035 /* Return true if we should emit lea instruction instead of mov
15036 instruction. */
15038 bool
15039 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
15041 unsigned int regno0, regno1;
15043 /* Check if we need to optimize. */
15044 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15045 return false;
15047 /* Use lea for reg to reg moves only. */
15048 if (!REG_P (operands[0]) || !REG_P (operands[1]))
15049 return false;
15051 regno0 = true_regnum (operands[0]);
15052 regno1 = true_regnum (operands[1]);
15054 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
15057 /* Return true if we need to split lea into a sequence of
15058 instructions to avoid AGU stalls. */
15060 bool
15061 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
15063 unsigned int regno0, regno1, regno2;
15064 int split_cost;
15065 struct ix86_address parts;
15066 int ok;
15068 /* The "at least two components" test below might not catch simple
15069 move or zero extension insns if parts.base is non-NULL and parts.disp
15070 is const0_rtx as the only components in the address, e.g. if the
15071 register is %rbp or %r13. As this test is much cheaper and moves or
15072 zero extensions are the common case, do this check first. */
15073 if (REG_P (operands[1])
15074 || (SImode_address_operand (operands[1], VOIDmode)
15075 && REG_P (XEXP (operands[1], 0))))
15076 return false;
15078 /* Check if it is OK to split here. */
15079 if (!ix86_ok_to_clobber_flags (insn))
15080 return false;
15082 ok = ix86_decompose_address (operands[1], &parts);
15083 gcc_assert (ok);
15085 /* There should be at least two components in the address. */
15086 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
15087 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
15088 return false;
15090 /* We should not split into add if non legitimate pic
15091 operand is used as displacement. */
15092 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
15093 return false;
15095 regno0 = true_regnum (operands[0]) ;
15096 regno1 = INVALID_REGNUM;
15097 regno2 = INVALID_REGNUM;
15099 if (parts.base)
15100 regno1 = true_regnum (parts.base);
15101 if (parts.index)
15102 regno2 = true_regnum (parts.index);
15104 /* Use add for a = a + b and a = b + a since it is faster and shorter
15105 than lea for most processors. For the processors like BONNELL, if
15106 the destination register of LEA holds an actual address which will
15107 be used soon, LEA is better and otherwise ADD is better. */
15108 if (!TARGET_BONNELL
15109 && parts.scale == 1
15110 && (!parts.disp || parts.disp == const0_rtx)
15111 && (regno0 == regno1 || regno0 == regno2))
15112 return true;
15114 /* Check we need to optimize. */
15115 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
15116 return false;
15118 split_cost = 0;
15120 /* Compute how many cycles we will add to execution time
15121 if split lea into a sequence of instructions. */
15122 if (parts.base || parts.index)
15124 /* Have to use mov instruction if non desctructive
15125 destination form is used. */
15126 if (regno1 != regno0 && regno2 != regno0)
15127 split_cost += 1;
15129 /* Have to add index to base if both exist. */
15130 if (parts.base && parts.index)
15131 split_cost += 1;
15133 /* Have to use shift and adds if scale is 2 or greater. */
15134 if (parts.scale > 1)
15136 if (regno0 != regno1)
15137 split_cost += 1;
15138 else if (regno2 == regno0)
15139 split_cost += 4;
15140 else
15141 split_cost += parts.scale;
15144 /* Have to use add instruction with immediate if
15145 disp is non zero. */
15146 if (parts.disp && parts.disp != const0_rtx)
15147 split_cost += 1;
15149 /* Subtract the price of lea. */
15150 split_cost -= 1;
15153 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
15154 parts.scale > 1);
15157 /* Return true if it is ok to optimize an ADD operation to LEA
15158 operation to avoid flag register consumation. For most processors,
15159 ADD is faster than LEA. For the processors like BONNELL, if the
15160 destination register of LEA holds an actual address which will be
15161 used soon, LEA is better and otherwise ADD is better. */
15163 bool
15164 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
15166 unsigned int regno0 = true_regnum (operands[0]);
15167 unsigned int regno1 = true_regnum (operands[1]);
15168 unsigned int regno2 = true_regnum (operands[2]);
15170 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15171 if (regno0 != regno1 && regno0 != regno2)
15172 return true;
15174 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15175 return false;
15177 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
15180 /* Return true if destination reg of SET_BODY is shift count of
15181 USE_BODY. */
15183 static bool
15184 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15186 rtx set_dest;
15187 rtx shift_rtx;
15188 int i;
15190 /* Retrieve destination of SET_BODY. */
15191 switch (GET_CODE (set_body))
15193 case SET:
15194 set_dest = SET_DEST (set_body);
15195 if (!set_dest || !REG_P (set_dest))
15196 return false;
15197 break;
15198 case PARALLEL:
15199 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15200 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15201 use_body))
15202 return true;
15203 /* FALLTHROUGH */
15204 default:
15205 return false;
15208 /* Retrieve shift count of USE_BODY. */
15209 switch (GET_CODE (use_body))
15211 case SET:
15212 shift_rtx = XEXP (use_body, 1);
15213 break;
15214 case PARALLEL:
15215 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15216 if (ix86_dep_by_shift_count_body (set_body,
15217 XVECEXP (use_body, 0, i)))
15218 return true;
15219 /* FALLTHROUGH */
15220 default:
15221 return false;
15224 if (shift_rtx
15225 && (GET_CODE (shift_rtx) == ASHIFT
15226 || GET_CODE (shift_rtx) == LSHIFTRT
15227 || GET_CODE (shift_rtx) == ASHIFTRT
15228 || GET_CODE (shift_rtx) == ROTATE
15229 || GET_CODE (shift_rtx) == ROTATERT))
15231 rtx shift_count = XEXP (shift_rtx, 1);
15233 /* Return true if shift count is dest of SET_BODY. */
15234 if (REG_P (shift_count))
15236 /* Add check since it can be invoked before register
15237 allocation in pre-reload schedule. */
15238 if (reload_completed
15239 && true_regnum (set_dest) == true_regnum (shift_count))
15240 return true;
15241 else if (REGNO(set_dest) == REGNO(shift_count))
15242 return true;
15246 return false;
15249 /* Return true if destination reg of SET_INSN is shift count of
15250 USE_INSN. */
15252 bool
15253 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15255 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15256 PATTERN (use_insn));
15259 /* Return TRUE or FALSE depending on whether the unary operator meets the
15260 appropriate constraints. */
15262 bool
15263 ix86_unary_operator_ok (enum rtx_code,
15264 machine_mode,
15265 rtx operands[2])
15267 /* If one of operands is memory, source and destination must match. */
15268 if ((MEM_P (operands[0])
15269 || MEM_P (operands[1]))
15270 && ! rtx_equal_p (operands[0], operands[1]))
15271 return false;
15272 return true;
15275 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15276 are ok, keeping in mind the possible movddup alternative. */
15278 bool
15279 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15281 if (MEM_P (operands[0]))
15282 return rtx_equal_p (operands[0], operands[1 + high]);
15283 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15284 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15285 return true;
15288 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15289 then replicate the value for all elements of the vector
15290 register. */
15293 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
15295 int i, n_elt;
15296 rtvec v;
15297 machine_mode scalar_mode;
15299 switch (mode)
15301 case E_V64QImode:
15302 case E_V32QImode:
15303 case E_V16QImode:
15304 case E_V32HImode:
15305 case E_V16HImode:
15306 case E_V8HImode:
15307 case E_V16SImode:
15308 case E_V8SImode:
15309 case E_V4SImode:
15310 case E_V8DImode:
15311 case E_V4DImode:
15312 case E_V2DImode:
15313 gcc_assert (vect);
15314 /* FALLTHRU */
15315 case E_V16SFmode:
15316 case E_V8SFmode:
15317 case E_V4SFmode:
15318 case E_V2SFmode:
15319 case E_V8DFmode:
15320 case E_V4DFmode:
15321 case E_V2DFmode:
15322 n_elt = GET_MODE_NUNITS (mode);
15323 v = rtvec_alloc (n_elt);
15324 scalar_mode = GET_MODE_INNER (mode);
15326 RTVEC_ELT (v, 0) = value;
15328 for (i = 1; i < n_elt; ++i)
15329 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
15331 return gen_rtx_CONST_VECTOR (mode, v);
15333 default:
15334 gcc_unreachable ();
15338 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15339 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15340 for an SSE register. If VECT is true, then replicate the mask for
15341 all elements of the vector register. If INVERT is true, then create
15342 a mask excluding the sign bit. */
15345 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
15347 machine_mode vec_mode, imode;
15348 wide_int w;
15349 rtx mask, v;
15351 switch (mode)
15353 case E_V16SImode:
15354 case E_V16SFmode:
15355 case E_V8SImode:
15356 case E_V4SImode:
15357 case E_V8SFmode:
15358 case E_V4SFmode:
15359 case E_V2SFmode:
15360 vec_mode = mode;
15361 imode = SImode;
15362 break;
15364 case E_V8DImode:
15365 case E_V4DImode:
15366 case E_V2DImode:
15367 case E_V8DFmode:
15368 case E_V4DFmode:
15369 case E_V2DFmode:
15370 vec_mode = mode;
15371 imode = DImode;
15372 break;
15374 case E_TImode:
15375 case E_TFmode:
15376 vec_mode = VOIDmode;
15377 imode = TImode;
15378 break;
15380 default:
15381 gcc_unreachable ();
15384 machine_mode inner_mode = GET_MODE_INNER (mode);
15385 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
15386 GET_MODE_BITSIZE (inner_mode));
15387 if (invert)
15388 w = wi::bit_not (w);
15390 /* Force this value into the low part of a fp vector constant. */
15391 mask = immed_wide_int_const (w, imode);
15392 mask = gen_lowpart (inner_mode, mask);
15394 if (vec_mode == VOIDmode)
15395 return force_reg (inner_mode, mask);
15397 v = ix86_build_const_vector (vec_mode, vect, mask);
15398 return force_reg (vec_mode, v);
15401 /* Return TRUE or FALSE depending on whether the first SET in INSN
15402 has source and destination with matching CC modes, and that the
15403 CC mode is at least as constrained as REQ_MODE. */
15405 bool
15406 ix86_match_ccmode (rtx insn, machine_mode req_mode)
15408 rtx set;
15409 machine_mode set_mode;
15411 set = PATTERN (insn);
15412 if (GET_CODE (set) == PARALLEL)
15413 set = XVECEXP (set, 0, 0);
15414 gcc_assert (GET_CODE (set) == SET);
15415 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15417 set_mode = GET_MODE (SET_DEST (set));
15418 switch (set_mode)
15420 case E_CCNOmode:
15421 if (req_mode != CCNOmode
15422 && (req_mode != CCmode
15423 || XEXP (SET_SRC (set), 1) != const0_rtx))
15424 return false;
15425 break;
15426 case E_CCmode:
15427 if (req_mode == CCGCmode)
15428 return false;
15429 /* FALLTHRU */
15430 case E_CCGCmode:
15431 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15432 return false;
15433 /* FALLTHRU */
15434 case E_CCGOCmode:
15435 if (req_mode == CCZmode)
15436 return false;
15437 /* FALLTHRU */
15438 case E_CCZmode:
15439 break;
15441 case E_CCGZmode:
15443 case E_CCAmode:
15444 case E_CCCmode:
15445 case E_CCOmode:
15446 case E_CCPmode:
15447 case E_CCSmode:
15448 if (set_mode != req_mode)
15449 return false;
15450 break;
15452 default:
15453 gcc_unreachable ();
15456 return GET_MODE (SET_SRC (set)) == set_mode;
15459 machine_mode
15460 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15462 machine_mode mode = GET_MODE (op0);
15464 if (SCALAR_FLOAT_MODE_P (mode))
15466 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15467 return CCFPmode;
15470 switch (code)
15472 /* Only zero flag is needed. */
15473 case EQ: /* ZF=0 */
15474 case NE: /* ZF!=0 */
15475 return CCZmode;
15476 /* Codes needing carry flag. */
15477 case GEU: /* CF=0 */
15478 case LTU: /* CF=1 */
15479 rtx geu;
15480 /* Detect overflow checks. They need just the carry flag. */
15481 if (GET_CODE (op0) == PLUS
15482 && (rtx_equal_p (op1, XEXP (op0, 0))
15483 || rtx_equal_p (op1, XEXP (op0, 1))))
15484 return CCCmode;
15485 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
15486 Match LTU of op0
15487 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
15488 and op1
15489 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
15490 where CC_CCC is either CC or CCC. */
15491 else if (code == LTU
15492 && GET_CODE (op0) == NEG
15493 && GET_CODE (geu = XEXP (op0, 0)) == GEU
15494 && REG_P (XEXP (geu, 0))
15495 && (GET_MODE (XEXP (geu, 0)) == CCCmode
15496 || GET_MODE (XEXP (geu, 0)) == CCmode)
15497 && REGNO (XEXP (geu, 0)) == FLAGS_REG
15498 && XEXP (geu, 1) == const0_rtx
15499 && GET_CODE (op1) == LTU
15500 && REG_P (XEXP (op1, 0))
15501 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
15502 && REGNO (XEXP (op1, 0)) == FLAGS_REG
15503 && XEXP (op1, 1) == const0_rtx)
15504 return CCCmode;
15505 else
15506 return CCmode;
15507 case GTU: /* CF=0 & ZF=0 */
15508 case LEU: /* CF=1 | ZF=1 */
15509 return CCmode;
15510 /* Codes possibly doable only with sign flag when
15511 comparing against zero. */
15512 case GE: /* SF=OF or SF=0 */
15513 case LT: /* SF<>OF or SF=1 */
15514 if (op1 == const0_rtx)
15515 return CCGOCmode;
15516 else
15517 /* For other cases Carry flag is not required. */
15518 return CCGCmode;
15519 /* Codes doable only with sign flag when comparing
15520 against zero, but we miss jump instruction for it
15521 so we need to use relational tests against overflow
15522 that thus needs to be zero. */
15523 case GT: /* ZF=0 & SF=OF */
15524 case LE: /* ZF=1 | SF<>OF */
15525 if (op1 == const0_rtx)
15526 return CCNOmode;
15527 else
15528 return CCGCmode;
15529 /* strcmp pattern do (use flags) and combine may ask us for proper
15530 mode. */
15531 case USE:
15532 return CCmode;
15533 default:
15534 gcc_unreachable ();
15538 /* Return the fixed registers used for condition codes. */
15540 static bool
15541 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15543 *p1 = FLAGS_REG;
15544 *p2 = INVALID_REGNUM;
15545 return true;
15548 /* If two condition code modes are compatible, return a condition code
15549 mode which is compatible with both. Otherwise, return
15550 VOIDmode. */
15552 static machine_mode
15553 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
15555 if (m1 == m2)
15556 return m1;
15558 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15559 return VOIDmode;
15561 if ((m1 == CCGCmode && m2 == CCGOCmode)
15562 || (m1 == CCGOCmode && m2 == CCGCmode))
15563 return CCGCmode;
15565 if ((m1 == CCNOmode && m2 == CCGOCmode)
15566 || (m1 == CCGOCmode && m2 == CCNOmode))
15567 return CCNOmode;
15569 if (m1 == CCZmode
15570 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
15571 return m2;
15572 else if (m2 == CCZmode
15573 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
15574 return m1;
15576 switch (m1)
15578 default:
15579 gcc_unreachable ();
15581 case E_CCmode:
15582 case E_CCGCmode:
15583 case E_CCGOCmode:
15584 case E_CCNOmode:
15585 case E_CCAmode:
15586 case E_CCCmode:
15587 case E_CCOmode:
15588 case E_CCPmode:
15589 case E_CCSmode:
15590 case E_CCZmode:
15591 switch (m2)
15593 default:
15594 return VOIDmode;
15596 case E_CCmode:
15597 case E_CCGCmode:
15598 case E_CCGOCmode:
15599 case E_CCNOmode:
15600 case E_CCAmode:
15601 case E_CCCmode:
15602 case E_CCOmode:
15603 case E_CCPmode:
15604 case E_CCSmode:
15605 case E_CCZmode:
15606 return CCmode;
15609 case E_CCFPmode:
15610 /* These are only compatible with themselves, which we already
15611 checked above. */
15612 return VOIDmode;
15616 /* Return strategy to use for floating-point. We assume that fcomi is always
15617 preferrable where available, since that is also true when looking at size
15618 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15620 enum ix86_fpcmp_strategy
15621 ix86_fp_comparison_strategy (enum rtx_code)
15623 /* Do fcomi/sahf based test when profitable. */
15625 if (TARGET_CMOVE)
15626 return IX86_FPCMP_COMI;
15628 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
15629 return IX86_FPCMP_SAHF;
15631 return IX86_FPCMP_ARITH;
15634 /* Convert comparison codes we use to represent FP comparison to integer
15635 code that will result in proper branch. Return UNKNOWN if no such code
15636 is available. */
15638 enum rtx_code
15639 ix86_fp_compare_code_to_integer (enum rtx_code code)
15641 switch (code)
15643 case GT:
15644 return GTU;
15645 case GE:
15646 return GEU;
15647 case ORDERED:
15648 case UNORDERED:
15649 return code;
15650 case UNEQ:
15651 return EQ;
15652 case UNLT:
15653 return LTU;
15654 case UNLE:
15655 return LEU;
15656 case LTGT:
15657 return NE;
15658 default:
15659 return UNKNOWN;
15663 /* Zero extend possibly SImode EXP to Pmode register. */
15665 ix86_zero_extend_to_Pmode (rtx exp)
15667 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
15670 /* Return true if the function being called was marked with attribute
15671 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
15672 to handle the non-PIC case in the backend because there is no easy
15673 interface for the front-end to force non-PLT calls to use the GOT.
15674 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
15675 to call the function marked "noplt" indirectly. */
15677 static bool
15678 ix86_nopic_noplt_attribute_p (rtx call_op)
15680 if (flag_pic || ix86_cmodel == CM_LARGE
15681 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
15682 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
15683 || SYMBOL_REF_LOCAL_P (call_op))
15684 return false;
15686 tree symbol_decl = SYMBOL_REF_DECL (call_op);
15688 if (!flag_plt
15689 || (symbol_decl != NULL_TREE
15690 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
15691 return true;
15693 return false;
15696 /* Helper to output the jmp/call. */
15697 static void
15698 ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
15700 if (thunk_name != NULL)
15702 fprintf (asm_out_file, "\tjmp\t");
15703 assemble_name (asm_out_file, thunk_name);
15704 putc ('\n', asm_out_file);
15706 else
15707 output_indirect_thunk (regno);
15710 /* Output indirect branch via a call and return thunk. CALL_OP is a
15711 register which contains the branch target. XASM is the assembly
15712 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
15713 A normal call is converted to:
15715 call __x86_indirect_thunk_reg
15717 and a tail call is converted to:
15719 jmp __x86_indirect_thunk_reg
15722 static void
15723 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
15725 char thunk_name_buf[32];
15726 char *thunk_name;
15727 enum indirect_thunk_prefix need_prefix
15728 = indirect_thunk_need_prefix (current_output_insn);
15729 int regno = REGNO (call_op);
15731 if (cfun->machine->indirect_branch_type
15732 != indirect_branch_thunk_inline)
15734 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15736 int i = regno;
15737 if (i >= FIRST_REX_INT_REG)
15738 i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1);
15739 indirect_thunks_used |= 1 << i;
15741 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15742 thunk_name = thunk_name_buf;
15744 else
15745 thunk_name = NULL;
15747 if (sibcall_p)
15748 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15749 else
15751 if (thunk_name != NULL)
15753 fprintf (asm_out_file, "\tcall\t");
15754 assemble_name (asm_out_file, thunk_name);
15755 putc ('\n', asm_out_file);
15756 return;
15759 char indirectlabel1[32];
15760 char indirectlabel2[32];
15762 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15763 INDIRECT_LABEL,
15764 indirectlabelno++);
15765 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15766 INDIRECT_LABEL,
15767 indirectlabelno++);
15769 /* Jump. */
15770 fputs ("\tjmp\t", asm_out_file);
15771 assemble_name_raw (asm_out_file, indirectlabel2);
15772 fputc ('\n', asm_out_file);
15774 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15776 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15778 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15780 /* Call. */
15781 fputs ("\tcall\t", asm_out_file);
15782 assemble_name_raw (asm_out_file, indirectlabel1);
15783 fputc ('\n', asm_out_file);
15787 /* Output indirect branch via a call and return thunk. CALL_OP is
15788 the branch target. XASM is the assembly template for CALL_OP.
15789 Branch is a tail call if SIBCALL_P is true. A normal call is
15790 converted to:
15792 jmp L2
15794 push CALL_OP
15795 jmp __x86_indirect_thunk
15797 call L1
15799 and a tail call is converted to:
15801 push CALL_OP
15802 jmp __x86_indirect_thunk
15805 static void
15806 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
15807 bool sibcall_p)
15809 char thunk_name_buf[32];
15810 char *thunk_name;
15811 char push_buf[64];
15812 enum indirect_thunk_prefix need_prefix
15813 = indirect_thunk_need_prefix (current_output_insn);
15814 int regno = -1;
15816 if (cfun->machine->indirect_branch_type
15817 != indirect_branch_thunk_inline)
15819 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15820 indirect_thunk_needed = true;
15821 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15822 thunk_name = thunk_name_buf;
15824 else
15825 thunk_name = NULL;
15827 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
15828 TARGET_64BIT ? 'q' : 'l', xasm);
15830 if (sibcall_p)
15832 output_asm_insn (push_buf, &call_op);
15833 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15835 else
15837 char indirectlabel1[32];
15838 char indirectlabel2[32];
15840 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15841 INDIRECT_LABEL,
15842 indirectlabelno++);
15843 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15844 INDIRECT_LABEL,
15845 indirectlabelno++);
15847 /* Jump. */
15848 fputs ("\tjmp\t", asm_out_file);
15849 assemble_name_raw (asm_out_file, indirectlabel2);
15850 fputc ('\n', asm_out_file);
15852 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15854 /* An external function may be called via GOT, instead of PLT. */
15855 if (MEM_P (call_op))
15857 struct ix86_address parts;
15858 rtx addr = XEXP (call_op, 0);
15859 if (ix86_decompose_address (addr, &parts)
15860 && parts.base == stack_pointer_rtx)
15862 /* Since call will adjust stack by -UNITS_PER_WORD,
15863 we must convert "disp(stack, index, scale)" to
15864 "disp+UNITS_PER_WORD(stack, index, scale)". */
15865 if (parts.index)
15867 addr = gen_rtx_MULT (Pmode, parts.index,
15868 GEN_INT (parts.scale));
15869 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15870 addr);
15872 else
15873 addr = stack_pointer_rtx;
15875 rtx disp;
15876 if (parts.disp != NULL_RTX)
15877 disp = plus_constant (Pmode, parts.disp,
15878 UNITS_PER_WORD);
15879 else
15880 disp = GEN_INT (UNITS_PER_WORD);
15882 addr = gen_rtx_PLUS (Pmode, addr, disp);
15883 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
15887 output_asm_insn (push_buf, &call_op);
15889 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15891 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15893 /* Call. */
15894 fputs ("\tcall\t", asm_out_file);
15895 assemble_name_raw (asm_out_file, indirectlabel1);
15896 fputc ('\n', asm_out_file);
15900 /* Output indirect branch via a call and return thunk. CALL_OP is
15901 the branch target. XASM is the assembly template for CALL_OP.
15902 Branch is a tail call if SIBCALL_P is true. */
15904 static void
15905 ix86_output_indirect_branch (rtx call_op, const char *xasm,
15906 bool sibcall_p)
15908 if (REG_P (call_op))
15909 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
15910 else
15911 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
15914 /* Output indirect jump. CALL_OP is the jump target. */
15916 const char *
15917 ix86_output_indirect_jmp (rtx call_op)
15919 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
15921 /* We can't have red-zone since "call" in the indirect thunk
15922 pushes the return address onto stack, destroying red-zone. */
15923 if (ix86_red_zone_size != 0)
15924 gcc_unreachable ();
15926 ix86_output_indirect_branch (call_op, "%0", true);
15927 return "";
15929 else
15930 return "%!jmp\t%A0";
15933 /* Output return instrumentation for current function if needed. */
15935 static void
15936 output_return_instrumentation (void)
15938 if (ix86_instrument_return != instrument_return_none
15939 && flag_fentry
15940 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
15942 if (ix86_flag_record_return)
15943 fprintf (asm_out_file, "1:\n");
15944 switch (ix86_instrument_return)
15946 case instrument_return_call:
15947 fprintf (asm_out_file, "\tcall\t__return__\n");
15948 break;
15949 case instrument_return_nop5:
15950 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
15951 fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
15952 break;
15953 case instrument_return_none:
15954 break;
15957 if (ix86_flag_record_return)
15959 fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
15960 fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
15961 fprintf (asm_out_file, "\t.previous\n");
15966 /* Output function return. CALL_OP is the jump target. Add a REP
15967 prefix to RET if LONG_P is true and function return is kept. */
15969 const char *
15970 ix86_output_function_return (bool long_p)
15972 output_return_instrumentation ();
15974 if (cfun->machine->function_return_type != indirect_branch_keep)
15976 char thunk_name[32];
15977 enum indirect_thunk_prefix need_prefix
15978 = indirect_thunk_need_prefix (current_output_insn);
15980 if (cfun->machine->function_return_type
15981 != indirect_branch_thunk_inline)
15983 bool need_thunk = (cfun->machine->function_return_type
15984 == indirect_branch_thunk);
15985 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
15986 true);
15987 indirect_return_needed |= need_thunk;
15988 fprintf (asm_out_file, "\tjmp\t");
15989 assemble_name (asm_out_file, thunk_name);
15990 putc ('\n', asm_out_file);
15992 else
15993 output_indirect_thunk (INVALID_REGNUM);
15995 return "";
15998 if (!long_p)
15999 return "%!ret";
16001 return "rep%; ret";
16004 /* Output indirect function return. RET_OP is the function return
16005 target. */
16007 const char *
16008 ix86_output_indirect_function_return (rtx ret_op)
16010 if (cfun->machine->function_return_type != indirect_branch_keep)
16012 char thunk_name[32];
16013 enum indirect_thunk_prefix need_prefix
16014 = indirect_thunk_need_prefix (current_output_insn);
16015 unsigned int regno = REGNO (ret_op);
16016 gcc_assert (regno == CX_REG);
16018 if (cfun->machine->function_return_type
16019 != indirect_branch_thunk_inline)
16021 bool need_thunk = (cfun->machine->function_return_type
16022 == indirect_branch_thunk);
16023 indirect_thunk_name (thunk_name, regno, need_prefix, true);
16025 if (need_thunk)
16027 indirect_return_via_cx = true;
16028 indirect_thunks_used |= 1 << CX_REG;
16030 fprintf (asm_out_file, "\tjmp\t");
16031 assemble_name (asm_out_file, thunk_name);
16032 putc ('\n', asm_out_file);
16034 else
16035 output_indirect_thunk (regno);
16037 return "";
16039 else
16040 return "%!jmp\t%A0";
16043 /* Output the assembly for a call instruction. */
16045 const char *
16046 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
16048 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
16049 bool output_indirect_p
16050 = (!TARGET_SEH
16051 && cfun->machine->indirect_branch_type != indirect_branch_keep);
16052 bool seh_nop_p = false;
16053 const char *xasm;
16055 if (SIBLING_CALL_P (insn))
16057 output_return_instrumentation ();
16058 if (direct_p)
16060 if (ix86_nopic_noplt_attribute_p (call_op))
16062 direct_p = false;
16063 if (TARGET_64BIT)
16065 if (output_indirect_p)
16066 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16067 else
16068 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16070 else
16072 if (output_indirect_p)
16073 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16074 else
16075 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16078 else
16079 xasm = "%!jmp\t%P0";
16081 /* SEH epilogue detection requires the indirect branch case
16082 to include REX.W. */
16083 else if (TARGET_SEH)
16084 xasm = "%!rex.W jmp\t%A0";
16085 else
16087 if (output_indirect_p)
16088 xasm = "%0";
16089 else
16090 xasm = "%!jmp\t%A0";
16093 if (output_indirect_p && !direct_p)
16094 ix86_output_indirect_branch (call_op, xasm, true);
16095 else
16096 output_asm_insn (xasm, &call_op);
16097 return "";
16100 /* SEH unwinding can require an extra nop to be emitted in several
16101 circumstances. Determine if we have one of those. */
16102 if (TARGET_SEH)
16104 rtx_insn *i;
16106 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
16108 /* Prevent a catch region from being adjacent to a jump that would
16109 be interpreted as an epilogue sequence by the unwinder. */
16110 if (JUMP_P(i) && CROSSING_JUMP_P (i))
16112 seh_nop_p = true;
16113 break;
16116 /* If we get to another real insn, we don't need the nop. */
16117 if (INSN_P (i))
16118 break;
16120 /* If we get to the epilogue note, prevent a catch region from
16121 being adjacent to the standard epilogue sequence. If non-
16122 call-exceptions, we'll have done this during epilogue emission. */
16123 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
16124 && !flag_non_call_exceptions
16125 && !can_throw_internal (insn))
16127 seh_nop_p = true;
16128 break;
16132 /* If we didn't find a real insn following the call, prevent the
16133 unwinder from looking into the next function. */
16134 if (i == NULL)
16135 seh_nop_p = true;
16138 if (direct_p)
16140 if (ix86_nopic_noplt_attribute_p (call_op))
16142 direct_p = false;
16143 if (TARGET_64BIT)
16145 if (output_indirect_p)
16146 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16147 else
16148 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16150 else
16152 if (output_indirect_p)
16153 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16154 else
16155 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16158 else
16159 xasm = "%!call\t%P0";
16161 else
16163 if (output_indirect_p)
16164 xasm = "%0";
16165 else
16166 xasm = "%!call\t%A0";
16169 if (output_indirect_p && !direct_p)
16170 ix86_output_indirect_branch (call_op, xasm, false);
16171 else
16172 output_asm_insn (xasm, &call_op);
16174 if (seh_nop_p)
16175 return "nop";
16177 return "";
16180 /* Return a MEM corresponding to a stack slot with mode MODE.
16181 Allocate a new slot if necessary.
16183 The RTL for a function can have several slots available: N is
16184 which slot to use. */
16187 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
16189 struct stack_local_entry *s;
16191 gcc_assert (n < MAX_386_STACK_LOCALS);
16193 for (s = ix86_stack_locals; s; s = s->next)
16194 if (s->mode == mode && s->n == n)
16195 return validize_mem (copy_rtx (s->rtl));
16197 s = ggc_alloc<stack_local_entry> ();
16198 s->n = n;
16199 s->mode = mode;
16200 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16202 s->next = ix86_stack_locals;
16203 ix86_stack_locals = s;
16204 return validize_mem (copy_rtx (s->rtl));
16207 static void
16208 ix86_instantiate_decls (void)
16210 struct stack_local_entry *s;
16212 for (s = ix86_stack_locals; s; s = s->next)
16213 if (s->rtl != NULL_RTX)
16214 instantiate_decl_rtl (s->rtl);
16217 /* Check whether x86 address PARTS is a pc-relative address. */
16219 bool
16220 ix86_rip_relative_addr_p (struct ix86_address *parts)
16222 rtx base, index, disp;
16224 base = parts->base;
16225 index = parts->index;
16226 disp = parts->disp;
16228 if (disp && !base && !index)
16230 if (TARGET_64BIT)
16232 rtx symbol = disp;
16234 if (GET_CODE (disp) == CONST)
16235 symbol = XEXP (disp, 0);
16236 if (GET_CODE (symbol) == PLUS
16237 && CONST_INT_P (XEXP (symbol, 1)))
16238 symbol = XEXP (symbol, 0);
16240 if (GET_CODE (symbol) == LABEL_REF
16241 || (GET_CODE (symbol) == SYMBOL_REF
16242 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
16243 || (GET_CODE (symbol) == UNSPEC
16244 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
16245 || XINT (symbol, 1) == UNSPEC_PCREL
16246 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
16247 return true;
16250 return false;
16253 /* Calculate the length of the memory address in the instruction encoding.
16254 Includes addr32 prefix, does not include the one-byte modrm, opcode,
16255 or other prefixes. We never generate addr32 prefix for LEA insn. */
16258 memory_address_length (rtx addr, bool lea)
16260 struct ix86_address parts;
16261 rtx base, index, disp;
16262 int len;
16263 int ok;
16265 if (GET_CODE (addr) == PRE_DEC
16266 || GET_CODE (addr) == POST_INC
16267 || GET_CODE (addr) == PRE_MODIFY
16268 || GET_CODE (addr) == POST_MODIFY)
16269 return 0;
16271 ok = ix86_decompose_address (addr, &parts);
16272 gcc_assert (ok);
16274 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
16276 /* If this is not LEA instruction, add the length of addr32 prefix. */
16277 if (TARGET_64BIT && !lea
16278 && (SImode_address_operand (addr, VOIDmode)
16279 || (parts.base && GET_MODE (parts.base) == SImode)
16280 || (parts.index && GET_MODE (parts.index) == SImode)))
16281 len++;
16283 base = parts.base;
16284 index = parts.index;
16285 disp = parts.disp;
16287 if (base && SUBREG_P (base))
16288 base = SUBREG_REG (base);
16289 if (index && SUBREG_P (index))
16290 index = SUBREG_REG (index);
16292 gcc_assert (base == NULL_RTX || REG_P (base));
16293 gcc_assert (index == NULL_RTX || REG_P (index));
16295 /* Rule of thumb:
16296 - esp as the base always wants an index,
16297 - ebp as the base always wants a displacement,
16298 - r12 as the base always wants an index,
16299 - r13 as the base always wants a displacement. */
16301 /* Register Indirect. */
16302 if (base && !index && !disp)
16304 /* esp (for its index) and ebp (for its displacement) need
16305 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
16306 code. */
16307 if (base == arg_pointer_rtx
16308 || base == frame_pointer_rtx
16309 || REGNO (base) == SP_REG
16310 || REGNO (base) == BP_REG
16311 || REGNO (base) == R12_REG
16312 || REGNO (base) == R13_REG)
16313 len++;
16316 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
16317 is not disp32, but disp32(%rip), so for disp32
16318 SIB byte is needed, unless print_operand_address
16319 optimizes it into disp32(%rip) or (%rip) is implied
16320 by UNSPEC. */
16321 else if (disp && !base && !index)
16323 len += 4;
16324 if (!ix86_rip_relative_addr_p (&parts))
16325 len++;
16327 else
16329 /* Find the length of the displacement constant. */
16330 if (disp)
16332 if (base && satisfies_constraint_K (disp))
16333 len += 1;
16334 else
16335 len += 4;
16337 /* ebp always wants a displacement. Similarly r13. */
16338 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
16339 len++;
16341 /* An index requires the two-byte modrm form.... */
16342 if (index
16343 /* ...like esp (or r12), which always wants an index. */
16344 || base == arg_pointer_rtx
16345 || base == frame_pointer_rtx
16346 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
16347 len++;
16350 return len;
16353 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16354 is set, expect that insn have 8bit immediate alternative. */
16356 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
16358 int len = 0;
16359 int i;
16360 extract_insn_cached (insn);
16361 for (i = recog_data.n_operands - 1; i >= 0; --i)
16362 if (CONSTANT_P (recog_data.operand[i]))
16364 enum attr_mode mode = get_attr_mode (insn);
16366 gcc_assert (!len);
16367 if (shortform && CONST_INT_P (recog_data.operand[i]))
16369 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
16370 switch (mode)
16372 case MODE_QI:
16373 len = 1;
16374 continue;
16375 case MODE_HI:
16376 ival = trunc_int_for_mode (ival, HImode);
16377 break;
16378 case MODE_SI:
16379 ival = trunc_int_for_mode (ival, SImode);
16380 break;
16381 default:
16382 break;
16384 if (IN_RANGE (ival, -128, 127))
16386 len = 1;
16387 continue;
16390 switch (mode)
16392 case MODE_QI:
16393 len = 1;
16394 break;
16395 case MODE_HI:
16396 len = 2;
16397 break;
16398 case MODE_SI:
16399 len = 4;
16400 break;
16401 /* Immediates for DImode instructions are encoded
16402 as 32bit sign extended values. */
16403 case MODE_DI:
16404 len = 4;
16405 break;
16406 default:
16407 fatal_insn ("unknown insn mode", insn);
16410 return len;
16413 /* Compute default value for "length_address" attribute. */
16415 ix86_attr_length_address_default (rtx_insn *insn)
16417 int i;
16419 if (get_attr_type (insn) == TYPE_LEA)
16421 rtx set = PATTERN (insn), addr;
16423 if (GET_CODE (set) == PARALLEL)
16424 set = XVECEXP (set, 0, 0);
16426 gcc_assert (GET_CODE (set) == SET);
16428 addr = SET_SRC (set);
16430 return memory_address_length (addr, true);
16433 extract_insn_cached (insn);
16434 for (i = recog_data.n_operands - 1; i >= 0; --i)
16436 rtx op = recog_data.operand[i];
16437 if (MEM_P (op))
16439 constrain_operands_cached (insn, reload_completed);
16440 if (which_alternative != -1)
16442 const char *constraints = recog_data.constraints[i];
16443 int alt = which_alternative;
16445 while (*constraints == '=' || *constraints == '+')
16446 constraints++;
16447 while (alt-- > 0)
16448 while (*constraints++ != ',')
16450 /* Skip ignored operands. */
16451 if (*constraints == 'X')
16452 continue;
16455 int len = memory_address_length (XEXP (op, 0), false);
16457 /* Account for segment prefix for non-default addr spaces. */
16458 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
16459 len++;
16461 return len;
16464 return 0;
16467 /* Compute default value for "length_vex" attribute. It includes
16468 2 or 3 byte VEX prefix and 1 opcode byte. */
16471 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
16472 bool has_vex_w)
16474 int i;
16476 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
16477 byte VEX prefix. */
16478 if (!has_0f_opcode || has_vex_w)
16479 return 3 + 1;
16481 /* We can always use 2 byte VEX prefix in 32bit. */
16482 if (!TARGET_64BIT)
16483 return 2 + 1;
16485 extract_insn_cached (insn);
16487 for (i = recog_data.n_operands - 1; i >= 0; --i)
16488 if (REG_P (recog_data.operand[i]))
16490 /* REX.W bit uses 3 byte VEX prefix. */
16491 if (GET_MODE (recog_data.operand[i]) == DImode
16492 && GENERAL_REG_P (recog_data.operand[i]))
16493 return 3 + 1;
16495 else
16497 /* REX.X or REX.B bits use 3 byte VEX prefix. */
16498 if (MEM_P (recog_data.operand[i])
16499 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
16500 return 3 + 1;
16503 return 2 + 1;
16507 static bool
16508 ix86_class_likely_spilled_p (reg_class_t);
16510 /* Returns true if lhs of insn is HW function argument register and set up
16511 is_spilled to true if it is likely spilled HW register. */
16512 static bool
16513 insn_is_function_arg (rtx insn, bool* is_spilled)
16515 rtx dst;
16517 if (!NONDEBUG_INSN_P (insn))
16518 return false;
16519 /* Call instructions are not movable, ignore it. */
16520 if (CALL_P (insn))
16521 return false;
16522 insn = PATTERN (insn);
16523 if (GET_CODE (insn) == PARALLEL)
16524 insn = XVECEXP (insn, 0, 0);
16525 if (GET_CODE (insn) != SET)
16526 return false;
16527 dst = SET_DEST (insn);
16528 if (REG_P (dst) && HARD_REGISTER_P (dst)
16529 && ix86_function_arg_regno_p (REGNO (dst)))
16531 /* Is it likely spilled HW register? */
16532 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
16533 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
16534 *is_spilled = true;
16535 return true;
16537 return false;
16540 /* Add output dependencies for chain of function adjacent arguments if only
16541 there is a move to likely spilled HW register. Return first argument
16542 if at least one dependence was added or NULL otherwise. */
16543 static rtx_insn *
16544 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
16546 rtx_insn *insn;
16547 rtx_insn *last = call;
16548 rtx_insn *first_arg = NULL;
16549 bool is_spilled = false;
16551 head = PREV_INSN (head);
16553 /* Find nearest to call argument passing instruction. */
16554 while (true)
16556 last = PREV_INSN (last);
16557 if (last == head)
16558 return NULL;
16559 if (!NONDEBUG_INSN_P (last))
16560 continue;
16561 if (insn_is_function_arg (last, &is_spilled))
16562 break;
16563 return NULL;
16566 first_arg = last;
16567 while (true)
16569 insn = PREV_INSN (last);
16570 if (!INSN_P (insn))
16571 break;
16572 if (insn == head)
16573 break;
16574 if (!NONDEBUG_INSN_P (insn))
16576 last = insn;
16577 continue;
16579 if (insn_is_function_arg (insn, &is_spilled))
16581 /* Add output depdendence between two function arguments if chain
16582 of output arguments contains likely spilled HW registers. */
16583 if (is_spilled)
16584 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16585 first_arg = last = insn;
16587 else
16588 break;
16590 if (!is_spilled)
16591 return NULL;
16592 return first_arg;
16595 /* Add output or anti dependency from insn to first_arg to restrict its code
16596 motion. */
16597 static void
16598 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
16600 rtx set;
16601 rtx tmp;
16603 set = single_set (insn);
16604 if (!set)
16605 return;
16606 tmp = SET_DEST (set);
16607 if (REG_P (tmp))
16609 /* Add output dependency to the first function argument. */
16610 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16611 return;
16613 /* Add anti dependency. */
16614 add_dependence (first_arg, insn, REG_DEP_ANTI);
16617 /* Avoid cross block motion of function argument through adding dependency
16618 from the first non-jump instruction in bb. */
16619 static void
16620 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
16622 rtx_insn *insn = BB_END (bb);
16624 while (insn)
16626 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
16628 rtx set = single_set (insn);
16629 if (set)
16631 avoid_func_arg_motion (arg, insn);
16632 return;
16635 if (insn == BB_HEAD (bb))
16636 return;
16637 insn = PREV_INSN (insn);
16641 /* Hook for pre-reload schedule - avoid motion of function arguments
16642 passed in likely spilled HW registers. */
16643 static void
16644 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
16646 rtx_insn *insn;
16647 rtx_insn *first_arg = NULL;
16648 if (reload_completed)
16649 return;
16650 while (head != tail && DEBUG_INSN_P (head))
16651 head = NEXT_INSN (head);
16652 for (insn = tail; insn != head; insn = PREV_INSN (insn))
16653 if (INSN_P (insn) && CALL_P (insn))
16655 first_arg = add_parameter_dependencies (insn, head);
16656 if (first_arg)
16658 /* Add dependee for first argument to predecessors if only
16659 region contains more than one block. */
16660 basic_block bb = BLOCK_FOR_INSN (insn);
16661 int rgn = CONTAINING_RGN (bb->index);
16662 int nr_blks = RGN_NR_BLOCKS (rgn);
16663 /* Skip trivial regions and region head blocks that can have
16664 predecessors outside of region. */
16665 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
16667 edge e;
16668 edge_iterator ei;
16670 /* Regions are SCCs with the exception of selective
16671 scheduling with pipelining of outer blocks enabled.
16672 So also check that immediate predecessors of a non-head
16673 block are in the same region. */
16674 FOR_EACH_EDGE (e, ei, bb->preds)
16676 /* Avoid creating of loop-carried dependencies through
16677 using topological ordering in the region. */
16678 if (rgn == CONTAINING_RGN (e->src->index)
16679 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
16680 add_dependee_for_func_arg (first_arg, e->src);
16683 insn = first_arg;
16684 if (insn == head)
16685 break;
16688 else if (first_arg)
16689 avoid_func_arg_motion (first_arg, insn);
16692 /* Hook for pre-reload schedule - set priority of moves from likely spilled
16693 HW registers to maximum, to schedule them at soon as possible. These are
16694 moves from function argument registers at the top of the function entry
16695 and moves from function return value registers after call. */
16696 static int
16697 ix86_adjust_priority (rtx_insn *insn, int priority)
16699 rtx set;
16701 if (reload_completed)
16702 return priority;
16704 if (!NONDEBUG_INSN_P (insn))
16705 return priority;
16707 set = single_set (insn);
16708 if (set)
16710 rtx tmp = SET_SRC (set);
16711 if (REG_P (tmp)
16712 && HARD_REGISTER_P (tmp)
16713 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
16714 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
16715 return current_sched_info->sched_max_insns_priority;
16718 return priority;
16721 /* Prepare for scheduling pass. */
16722 static void
16723 ix86_sched_init_global (FILE *, int, int)
16725 /* Install scheduling hooks for current CPU. Some of these hooks are used
16726 in time-critical parts of the scheduler, so we only set them up when
16727 they are actually used. */
16728 switch (ix86_tune)
16730 case PROCESSOR_CORE2:
16731 case PROCESSOR_NEHALEM:
16732 case PROCESSOR_SANDYBRIDGE:
16733 case PROCESSOR_HASWELL:
16734 case PROCESSOR_GENERIC:
16735 /* Do not perform multipass scheduling for pre-reload schedule
16736 to save compile time. */
16737 if (reload_completed)
16739 ix86_core2i7_init_hooks ();
16740 break;
16742 /* Fall through. */
16743 default:
16744 targetm.sched.dfa_post_advance_cycle = NULL;
16745 targetm.sched.first_cycle_multipass_init = NULL;
16746 targetm.sched.first_cycle_multipass_begin = NULL;
16747 targetm.sched.first_cycle_multipass_issue = NULL;
16748 targetm.sched.first_cycle_multipass_backtrack = NULL;
16749 targetm.sched.first_cycle_multipass_end = NULL;
16750 targetm.sched.first_cycle_multipass_fini = NULL;
16751 break;
16756 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
16758 static HOST_WIDE_INT
16759 ix86_static_rtx_alignment (machine_mode mode)
16761 if (mode == DFmode)
16762 return 64;
16763 if (ALIGN_MODE_128 (mode))
16764 return MAX (128, GET_MODE_ALIGNMENT (mode));
16765 return GET_MODE_ALIGNMENT (mode);
16768 /* Implement TARGET_CONSTANT_ALIGNMENT. */
16770 static HOST_WIDE_INT
16771 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
16773 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
16774 || TREE_CODE (exp) == INTEGER_CST)
16776 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
16777 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
16778 return MAX (mode_align, align);
16780 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16781 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16782 return BITS_PER_WORD;
16784 return align;
16787 /* Implement TARGET_EMPTY_RECORD_P. */
16789 static bool
16790 ix86_is_empty_record (const_tree type)
16792 if (!TARGET_64BIT)
16793 return false;
16794 return default_is_empty_record (type);
16797 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
16799 static void
16800 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
16802 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
16804 if (!cum->warn_empty)
16805 return;
16807 if (!TYPE_EMPTY_P (type))
16808 return;
16810 /* Don't warn if the function isn't visible outside of the TU. */
16811 if (cum->decl && !TREE_PUBLIC (cum->decl))
16812 return;
16814 const_tree ctx = get_ultimate_context (cum->decl);
16815 if (ctx != NULL_TREE
16816 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
16817 return;
16819 /* If the actual size of the type is zero, then there is no change
16820 in how objects of this size are passed. */
16821 if (int_size_in_bytes (type) == 0)
16822 return;
16824 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
16825 "changes in %<-fabi-version=12%> (GCC 8)", type);
16827 /* Only warn once. */
16828 cum->warn_empty = false;
16831 /* This hook returns name of multilib ABI. */
16833 static const char *
16834 ix86_get_multilib_abi_name (void)
16836 if (!(TARGET_64BIT_P (ix86_isa_flags)))
16837 return "i386";
16838 else if (TARGET_X32_P (ix86_isa_flags))
16839 return "x32";
16840 else
16841 return "x86_64";
16844 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
16845 the data type, and ALIGN is the alignment that the object would
16846 ordinarily have. */
16848 static int
16849 iamcu_alignment (tree type, int align)
16851 machine_mode mode;
16853 if (align < 32 || TYPE_USER_ALIGN (type))
16854 return align;
16856 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
16857 bytes. */
16858 type = strip_array_types (type);
16859 if (TYPE_ATOMIC (type))
16860 return align;
16862 mode = TYPE_MODE (type);
16863 switch (GET_MODE_CLASS (mode))
16865 case MODE_INT:
16866 case MODE_COMPLEX_INT:
16867 case MODE_COMPLEX_FLOAT:
16868 case MODE_FLOAT:
16869 case MODE_DECIMAL_FLOAT:
16870 return 32;
16871 default:
16872 return align;
16876 /* Compute the alignment for a static variable.
16877 TYPE is the data type, and ALIGN is the alignment that
16878 the object would ordinarily have. The value of this function is used
16879 instead of that alignment to align the object. */
16882 ix86_data_alignment (tree type, unsigned int align, bool opt)
16884 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
16885 for symbols from other compilation units or symbols that don't need
16886 to bind locally. In order to preserve some ABI compatibility with
16887 those compilers, ensure we don't decrease alignment from what we
16888 used to assume. */
16890 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
16892 /* A data structure, equal or greater than the size of a cache line
16893 (64 bytes in the Pentium 4 and other recent Intel processors, including
16894 processors based on Intel Core microarchitecture) should be aligned
16895 so that its base address is a multiple of a cache line size. */
16897 unsigned int max_align
16898 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
16900 if (max_align < BITS_PER_WORD)
16901 max_align = BITS_PER_WORD;
16903 switch (ix86_align_data_type)
16905 case ix86_align_data_type_abi: opt = false; break;
16906 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
16907 case ix86_align_data_type_cacheline: break;
16910 if (TARGET_IAMCU)
16911 align = iamcu_alignment (type, align);
16913 if (opt
16914 && AGGREGATE_TYPE_P (type)
16915 && TYPE_SIZE (type)
16916 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
16918 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
16919 && align < max_align_compat)
16920 align = max_align_compat;
16921 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
16922 && align < max_align)
16923 align = max_align;
16926 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16927 to 16byte boundary. */
16928 if (TARGET_64BIT)
16930 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
16931 && TYPE_SIZE (type)
16932 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16933 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
16934 && align < 128)
16935 return 128;
16938 if (!opt)
16939 return align;
16941 if (TREE_CODE (type) == ARRAY_TYPE)
16943 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16944 return 64;
16945 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16946 return 128;
16948 else if (TREE_CODE (type) == COMPLEX_TYPE)
16951 if (TYPE_MODE (type) == DCmode && align < 64)
16952 return 64;
16953 if ((TYPE_MODE (type) == XCmode
16954 || TYPE_MODE (type) == TCmode) && align < 128)
16955 return 128;
16957 else if ((TREE_CODE (type) == RECORD_TYPE
16958 || TREE_CODE (type) == UNION_TYPE
16959 || TREE_CODE (type) == QUAL_UNION_TYPE)
16960 && TYPE_FIELDS (type))
16962 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16963 return 64;
16964 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16965 return 128;
16967 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16968 || TREE_CODE (type) == INTEGER_TYPE)
16970 if (TYPE_MODE (type) == DFmode && align < 64)
16971 return 64;
16972 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16973 return 128;
16976 return align;
16979 /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
16980 static void
16981 ix86_lower_local_decl_alignment (tree decl)
16983 unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
16984 DECL_ALIGN (decl), true);
16985 if (new_align < DECL_ALIGN (decl))
16986 SET_DECL_ALIGN (decl, new_align);
16989 /* Compute the alignment for a local variable or a stack slot. EXP is
16990 the data type or decl itself, MODE is the widest mode available and
16991 ALIGN is the alignment that the object would ordinarily have. The
16992 value of this macro is used instead of that alignment to align the
16993 object. */
16995 unsigned int
16996 ix86_local_alignment (tree exp, machine_mode mode,
16997 unsigned int align, bool may_lower)
16999 tree type, decl;
17001 if (exp && DECL_P (exp))
17003 type = TREE_TYPE (exp);
17004 decl = exp;
17006 else
17008 type = exp;
17009 decl = NULL;
17012 /* Don't do dynamic stack realignment for long long objects with
17013 -mpreferred-stack-boundary=2. */
17014 if (may_lower
17015 && !TARGET_64BIT
17016 && align == 64
17017 && ix86_preferred_stack_boundary < 64
17018 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
17019 && (!type || (!TYPE_USER_ALIGN (type)
17020 && !TYPE_ATOMIC (strip_array_types (type))))
17021 && (!decl || !DECL_USER_ALIGN (decl)))
17022 align = 32;
17024 /* If TYPE is NULL, we are allocating a stack slot for caller-save
17025 register in MODE. We will return the largest alignment of XF
17026 and DF. */
17027 if (!type)
17029 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
17030 align = GET_MODE_ALIGNMENT (DFmode);
17031 return align;
17034 /* Don't increase alignment for Intel MCU psABI. */
17035 if (TARGET_IAMCU)
17036 return align;
17038 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17039 to 16byte boundary. Exact wording is:
17041 An array uses the same alignment as its elements, except that a local or
17042 global array variable of length at least 16 bytes or
17043 a C99 variable-length array variable always has alignment of at least 16 bytes.
17045 This was added to allow use of aligned SSE instructions at arrays. This
17046 rule is meant for static storage (where compiler cannot do the analysis
17047 by itself). We follow it for automatic variables only when convenient.
17048 We fully control everything in the function compiled and functions from
17049 other unit cannot rely on the alignment.
17051 Exclude va_list type. It is the common case of local array where
17052 we cannot benefit from the alignment.
17054 TODO: Probably one should optimize for size only when var is not escaping. */
17055 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
17056 && TARGET_SSE)
17058 if (AGGREGATE_TYPE_P (type)
17059 && (va_list_type_node == NULL_TREE
17060 || (TYPE_MAIN_VARIANT (type)
17061 != TYPE_MAIN_VARIANT (va_list_type_node)))
17062 && TYPE_SIZE (type)
17063 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17064 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
17065 && align < 128)
17066 return 128;
17068 if (TREE_CODE (type) == ARRAY_TYPE)
17070 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17071 return 64;
17072 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17073 return 128;
17075 else if (TREE_CODE (type) == COMPLEX_TYPE)
17077 if (TYPE_MODE (type) == DCmode && align < 64)
17078 return 64;
17079 if ((TYPE_MODE (type) == XCmode
17080 || TYPE_MODE (type) == TCmode) && align < 128)
17081 return 128;
17083 else if ((TREE_CODE (type) == RECORD_TYPE
17084 || TREE_CODE (type) == UNION_TYPE
17085 || TREE_CODE (type) == QUAL_UNION_TYPE)
17086 && TYPE_FIELDS (type))
17088 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17089 return 64;
17090 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17091 return 128;
17093 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17094 || TREE_CODE (type) == INTEGER_TYPE)
17097 if (TYPE_MODE (type) == DFmode && align < 64)
17098 return 64;
17099 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17100 return 128;
17102 return align;
17105 /* Compute the minimum required alignment for dynamic stack realignment
17106 purposes for a local variable, parameter or a stack slot. EXP is
17107 the data type or decl itself, MODE is its mode and ALIGN is the
17108 alignment that the object would ordinarily have. */
17110 unsigned int
17111 ix86_minimum_alignment (tree exp, machine_mode mode,
17112 unsigned int align)
17114 tree type, decl;
17116 if (exp && DECL_P (exp))
17118 type = TREE_TYPE (exp);
17119 decl = exp;
17121 else
17123 type = exp;
17124 decl = NULL;
17127 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
17128 return align;
17130 /* Don't do dynamic stack realignment for long long objects with
17131 -mpreferred-stack-boundary=2. */
17132 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
17133 && (!type || (!TYPE_USER_ALIGN (type)
17134 && !TYPE_ATOMIC (strip_array_types (type))))
17135 && (!decl || !DECL_USER_ALIGN (decl)))
17137 gcc_checking_assert (!TARGET_STV);
17138 return 32;
17141 return align;
17144 /* Find a location for the static chain incoming to a nested function.
17145 This is a register, unless all free registers are used by arguments. */
17147 static rtx
17148 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
17150 unsigned regno;
17152 if (TARGET_64BIT)
17154 /* We always use R10 in 64-bit mode. */
17155 regno = R10_REG;
17157 else
17159 const_tree fntype, fndecl;
17160 unsigned int ccvt;
17162 /* By default in 32-bit mode we use ECX to pass the static chain. */
17163 regno = CX_REG;
17165 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
17167 fntype = TREE_TYPE (fndecl_or_type);
17168 fndecl = fndecl_or_type;
17170 else
17172 fntype = fndecl_or_type;
17173 fndecl = NULL;
17176 ccvt = ix86_get_callcvt (fntype);
17177 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
17179 /* Fastcall functions use ecx/edx for arguments, which leaves
17180 us with EAX for the static chain.
17181 Thiscall functions use ecx for arguments, which also
17182 leaves us with EAX for the static chain. */
17183 regno = AX_REG;
17185 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
17187 /* Thiscall functions use ecx for arguments, which leaves
17188 us with EAX and EDX for the static chain.
17189 We are using for abi-compatibility EAX. */
17190 regno = AX_REG;
17192 else if (ix86_function_regparm (fntype, fndecl) == 3)
17194 /* For regparm 3, we have no free call-clobbered registers in
17195 which to store the static chain. In order to implement this,
17196 we have the trampoline push the static chain to the stack.
17197 However, we can't push a value below the return address when
17198 we call the nested function directly, so we have to use an
17199 alternate entry point. For this we use ESI, and have the
17200 alternate entry point push ESI, so that things appear the
17201 same once we're executing the nested function. */
17202 if (incoming_p)
17204 if (fndecl == current_function_decl
17205 && !ix86_static_chain_on_stack)
17207 gcc_assert (!reload_completed);
17208 ix86_static_chain_on_stack = true;
17210 return gen_frame_mem (SImode,
17211 plus_constant (Pmode,
17212 arg_pointer_rtx, -8));
17214 regno = SI_REG;
17218 return gen_rtx_REG (Pmode, regno);
17221 /* Emit RTL insns to initialize the variable parts of a trampoline.
17222 FNDECL is the decl of the target address; M_TRAMP is a MEM for
17223 the trampoline, and CHAIN_VALUE is an RTX for the static chain
17224 to be passed to the target function. */
17226 static void
17227 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
17229 rtx mem, fnaddr;
17230 int opcode;
17231 int offset = 0;
17232 bool need_endbr = (flag_cf_protection & CF_BRANCH);
17234 fnaddr = XEXP (DECL_RTL (fndecl), 0);
17236 if (TARGET_64BIT)
17238 int size;
17240 if (need_endbr)
17242 /* Insert ENDBR64. */
17243 mem = adjust_address (m_tramp, SImode, offset);
17244 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
17245 offset += 4;
17248 /* Load the function address to r11. Try to load address using
17249 the shorter movl instead of movabs. We may want to support
17250 movq for kernel mode, but kernel does not use trampolines at
17251 the moment. FNADDR is a 32bit address and may not be in
17252 DImode when ptr_mode == SImode. Always use movl in this
17253 case. */
17254 if (ptr_mode == SImode
17255 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17257 fnaddr = copy_addr_to_reg (fnaddr);
17259 mem = adjust_address (m_tramp, HImode, offset);
17260 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
17262 mem = adjust_address (m_tramp, SImode, offset + 2);
17263 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
17264 offset += 6;
17266 else
17268 mem = adjust_address (m_tramp, HImode, offset);
17269 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
17271 mem = adjust_address (m_tramp, DImode, offset + 2);
17272 emit_move_insn (mem, fnaddr);
17273 offset += 10;
17276 /* Load static chain using movabs to r10. Use the shorter movl
17277 instead of movabs when ptr_mode == SImode. */
17278 if (ptr_mode == SImode)
17280 opcode = 0xba41;
17281 size = 6;
17283 else
17285 opcode = 0xba49;
17286 size = 10;
17289 mem = adjust_address (m_tramp, HImode, offset);
17290 emit_move_insn (mem, gen_int_mode (opcode, HImode));
17292 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
17293 emit_move_insn (mem, chain_value);
17294 offset += size;
17296 /* Jump to r11; the last (unused) byte is a nop, only there to
17297 pad the write out to a single 32-bit store. */
17298 mem = adjust_address (m_tramp, SImode, offset);
17299 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
17300 offset += 4;
17302 else
17304 rtx disp, chain;
17306 /* Depending on the static chain location, either load a register
17307 with a constant, or push the constant to the stack. All of the
17308 instructions are the same size. */
17309 chain = ix86_static_chain (fndecl, true);
17310 if (REG_P (chain))
17312 switch (REGNO (chain))
17314 case AX_REG:
17315 opcode = 0xb8; break;
17316 case CX_REG:
17317 opcode = 0xb9; break;
17318 default:
17319 gcc_unreachable ();
17322 else
17323 opcode = 0x68;
17325 if (need_endbr)
17327 /* Insert ENDBR32. */
17328 mem = adjust_address (m_tramp, SImode, offset);
17329 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
17330 offset += 4;
17333 mem = adjust_address (m_tramp, QImode, offset);
17334 emit_move_insn (mem, gen_int_mode (opcode, QImode));
17336 mem = adjust_address (m_tramp, SImode, offset + 1);
17337 emit_move_insn (mem, chain_value);
17338 offset += 5;
17340 mem = adjust_address (m_tramp, QImode, offset);
17341 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
17343 mem = adjust_address (m_tramp, SImode, offset + 1);
17345 /* Compute offset from the end of the jmp to the target function.
17346 In the case in which the trampoline stores the static chain on
17347 the stack, we need to skip the first insn which pushes the
17348 (call-saved) register static chain; this push is 1 byte. */
17349 offset += 5;
17350 int skip = MEM_P (chain) ? 1 : 0;
17351 /* Skip ENDBR32 at the entry of the target function. */
17352 if (need_endbr
17353 && !cgraph_node::get (fndecl)->only_called_directly_p ())
17354 skip += 4;
17355 disp = expand_binop (SImode, sub_optab, fnaddr,
17356 plus_constant (Pmode, XEXP (m_tramp, 0),
17357 offset - skip),
17358 NULL_RTX, 1, OPTAB_DIRECT);
17359 emit_move_insn (mem, disp);
17362 gcc_assert (offset <= TRAMPOLINE_SIZE);
17364 #ifdef HAVE_ENABLE_EXECUTE_STACK
17365 #ifdef CHECK_EXECUTE_STACK_ENABLED
17366 if (CHECK_EXECUTE_STACK_ENABLED)
17367 #endif
17368 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17369 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
17370 #endif
17373 static bool
17374 ix86_allocate_stack_slots_for_args (void)
17376 /* Naked functions should not allocate stack slots for arguments. */
17377 return !ix86_function_naked (current_function_decl);
17380 static bool
17381 ix86_warn_func_return (tree decl)
17383 /* Naked functions are implemented entirely in assembly, including the
17384 return sequence, so suppress warnings about this. */
17385 return !ix86_function_naked (decl);
17388 /* Return the shift count of a vector by scalar shift builtin second argument
17389 ARG1. */
17390 static tree
17391 ix86_vector_shift_count (tree arg1)
17393 if (tree_fits_uhwi_p (arg1))
17394 return arg1;
17395 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
17397 /* The count argument is weird, passed in as various 128-bit
17398 (or 64-bit) vectors, the low 64 bits from it are the count. */
17399 unsigned char buf[16];
17400 int len = native_encode_expr (arg1, buf, 16);
17401 if (len == 0)
17402 return NULL_TREE;
17403 tree t = native_interpret_expr (uint64_type_node, buf, len);
17404 if (t && tree_fits_uhwi_p (t))
17405 return t;
17407 return NULL_TREE;
17410 static tree
17411 ix86_fold_builtin (tree fndecl, int n_args,
17412 tree *args, bool ignore ATTRIBUTE_UNUSED)
17414 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
17416 enum ix86_builtins fn_code
17417 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17418 enum rtx_code rcode;
17419 bool is_vshift;
17420 unsigned HOST_WIDE_INT mask;
17422 switch (fn_code)
17424 case IX86_BUILTIN_CPU_IS:
17425 case IX86_BUILTIN_CPU_SUPPORTS:
17426 gcc_assert (n_args == 1);
17427 return fold_builtin_cpu (fndecl, args);
17429 case IX86_BUILTIN_NANQ:
17430 case IX86_BUILTIN_NANSQ:
17432 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17433 const char *str = c_getstr (*args);
17434 int quiet = fn_code == IX86_BUILTIN_NANQ;
17435 REAL_VALUE_TYPE real;
17437 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
17438 return build_real (type, real);
17439 return NULL_TREE;
17442 case IX86_BUILTIN_INFQ:
17443 case IX86_BUILTIN_HUGE_VALQ:
17445 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17446 REAL_VALUE_TYPE inf;
17447 real_inf (&inf);
17448 return build_real (type, inf);
17451 case IX86_BUILTIN_TZCNT16:
17452 case IX86_BUILTIN_CTZS:
17453 case IX86_BUILTIN_TZCNT32:
17454 case IX86_BUILTIN_TZCNT64:
17455 gcc_assert (n_args == 1);
17456 if (TREE_CODE (args[0]) == INTEGER_CST)
17458 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17459 tree arg = args[0];
17460 if (fn_code == IX86_BUILTIN_TZCNT16
17461 || fn_code == IX86_BUILTIN_CTZS)
17462 arg = fold_convert (short_unsigned_type_node, arg);
17463 if (integer_zerop (arg))
17464 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17465 else
17466 return fold_const_call (CFN_CTZ, type, arg);
17468 break;
17470 case IX86_BUILTIN_LZCNT16:
17471 case IX86_BUILTIN_CLZS:
17472 case IX86_BUILTIN_LZCNT32:
17473 case IX86_BUILTIN_LZCNT64:
17474 gcc_assert (n_args == 1);
17475 if (TREE_CODE (args[0]) == INTEGER_CST)
17477 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17478 tree arg = args[0];
17479 if (fn_code == IX86_BUILTIN_LZCNT16
17480 || fn_code == IX86_BUILTIN_CLZS)
17481 arg = fold_convert (short_unsigned_type_node, arg);
17482 if (integer_zerop (arg))
17483 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17484 else
17485 return fold_const_call (CFN_CLZ, type, arg);
17487 break;
17489 case IX86_BUILTIN_BEXTR32:
17490 case IX86_BUILTIN_BEXTR64:
17491 case IX86_BUILTIN_BEXTRI32:
17492 case IX86_BUILTIN_BEXTRI64:
17493 gcc_assert (n_args == 2);
17494 if (tree_fits_uhwi_p (args[1]))
17496 unsigned HOST_WIDE_INT res = 0;
17497 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
17498 unsigned int start = tree_to_uhwi (args[1]);
17499 unsigned int len = (start & 0xff00) >> 8;
17500 start &= 0xff;
17501 if (start >= prec || len == 0)
17502 res = 0;
17503 else if (!tree_fits_uhwi_p (args[0]))
17504 break;
17505 else
17506 res = tree_to_uhwi (args[0]) >> start;
17507 if (len > prec)
17508 len = prec;
17509 if (len < HOST_BITS_PER_WIDE_INT)
17510 res &= (HOST_WIDE_INT_1U << len) - 1;
17511 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17513 break;
17515 case IX86_BUILTIN_BZHI32:
17516 case IX86_BUILTIN_BZHI64:
17517 gcc_assert (n_args == 2);
17518 if (tree_fits_uhwi_p (args[1]))
17520 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
17521 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
17522 return args[0];
17523 if (idx == 0)
17524 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
17525 if (!tree_fits_uhwi_p (args[0]))
17526 break;
17527 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
17528 res &= ~(HOST_WIDE_INT_M1U << idx);
17529 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17531 break;
17533 case IX86_BUILTIN_PDEP32:
17534 case IX86_BUILTIN_PDEP64:
17535 gcc_assert (n_args == 2);
17536 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
17538 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
17539 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
17540 unsigned HOST_WIDE_INT res = 0;
17541 unsigned HOST_WIDE_INT m, k = 1;
17542 for (m = 1; m; m <<= 1)
17543 if ((mask & m) != 0)
17545 if ((src & k) != 0)
17546 res |= m;
17547 k <<= 1;
17549 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17551 break;
17553 case IX86_BUILTIN_PEXT32:
17554 case IX86_BUILTIN_PEXT64:
17555 gcc_assert (n_args == 2);
17556 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
17558 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
17559 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
17560 unsigned HOST_WIDE_INT res = 0;
17561 unsigned HOST_WIDE_INT m, k = 1;
17562 for (m = 1; m; m <<= 1)
17563 if ((mask & m) != 0)
17565 if ((src & m) != 0)
17566 res |= k;
17567 k <<= 1;
17569 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17571 break;
17573 case IX86_BUILTIN_MOVMSKPS:
17574 case IX86_BUILTIN_PMOVMSKB:
17575 case IX86_BUILTIN_MOVMSKPD:
17576 case IX86_BUILTIN_PMOVMSKB128:
17577 case IX86_BUILTIN_MOVMSKPD256:
17578 case IX86_BUILTIN_MOVMSKPS256:
17579 case IX86_BUILTIN_PMOVMSKB256:
17580 gcc_assert (n_args == 1);
17581 if (TREE_CODE (args[0]) == VECTOR_CST)
17583 HOST_WIDE_INT res = 0;
17584 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
17586 tree e = VECTOR_CST_ELT (args[0], i);
17587 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
17589 if (wi::neg_p (wi::to_wide (e)))
17590 res |= HOST_WIDE_INT_1 << i;
17592 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
17594 if (TREE_REAL_CST (e).sign)
17595 res |= HOST_WIDE_INT_1 << i;
17597 else
17598 return NULL_TREE;
17600 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
17602 break;
17604 case IX86_BUILTIN_PSLLD:
17605 case IX86_BUILTIN_PSLLD128:
17606 case IX86_BUILTIN_PSLLD128_MASK:
17607 case IX86_BUILTIN_PSLLD256:
17608 case IX86_BUILTIN_PSLLD256_MASK:
17609 case IX86_BUILTIN_PSLLD512:
17610 case IX86_BUILTIN_PSLLDI:
17611 case IX86_BUILTIN_PSLLDI128:
17612 case IX86_BUILTIN_PSLLDI128_MASK:
17613 case IX86_BUILTIN_PSLLDI256:
17614 case IX86_BUILTIN_PSLLDI256_MASK:
17615 case IX86_BUILTIN_PSLLDI512:
17616 case IX86_BUILTIN_PSLLQ:
17617 case IX86_BUILTIN_PSLLQ128:
17618 case IX86_BUILTIN_PSLLQ128_MASK:
17619 case IX86_BUILTIN_PSLLQ256:
17620 case IX86_BUILTIN_PSLLQ256_MASK:
17621 case IX86_BUILTIN_PSLLQ512:
17622 case IX86_BUILTIN_PSLLQI:
17623 case IX86_BUILTIN_PSLLQI128:
17624 case IX86_BUILTIN_PSLLQI128_MASK:
17625 case IX86_BUILTIN_PSLLQI256:
17626 case IX86_BUILTIN_PSLLQI256_MASK:
17627 case IX86_BUILTIN_PSLLQI512:
17628 case IX86_BUILTIN_PSLLW:
17629 case IX86_BUILTIN_PSLLW128:
17630 case IX86_BUILTIN_PSLLW128_MASK:
17631 case IX86_BUILTIN_PSLLW256:
17632 case IX86_BUILTIN_PSLLW256_MASK:
17633 case IX86_BUILTIN_PSLLW512_MASK:
17634 case IX86_BUILTIN_PSLLWI:
17635 case IX86_BUILTIN_PSLLWI128:
17636 case IX86_BUILTIN_PSLLWI128_MASK:
17637 case IX86_BUILTIN_PSLLWI256:
17638 case IX86_BUILTIN_PSLLWI256_MASK:
17639 case IX86_BUILTIN_PSLLWI512_MASK:
17640 rcode = ASHIFT;
17641 is_vshift = false;
17642 goto do_shift;
17643 case IX86_BUILTIN_PSRAD:
17644 case IX86_BUILTIN_PSRAD128:
17645 case IX86_BUILTIN_PSRAD128_MASK:
17646 case IX86_BUILTIN_PSRAD256:
17647 case IX86_BUILTIN_PSRAD256_MASK:
17648 case IX86_BUILTIN_PSRAD512:
17649 case IX86_BUILTIN_PSRADI:
17650 case IX86_BUILTIN_PSRADI128:
17651 case IX86_BUILTIN_PSRADI128_MASK:
17652 case IX86_BUILTIN_PSRADI256:
17653 case IX86_BUILTIN_PSRADI256_MASK:
17654 case IX86_BUILTIN_PSRADI512:
17655 case IX86_BUILTIN_PSRAQ128_MASK:
17656 case IX86_BUILTIN_PSRAQ256_MASK:
17657 case IX86_BUILTIN_PSRAQ512:
17658 case IX86_BUILTIN_PSRAQI128_MASK:
17659 case IX86_BUILTIN_PSRAQI256_MASK:
17660 case IX86_BUILTIN_PSRAQI512:
17661 case IX86_BUILTIN_PSRAW:
17662 case IX86_BUILTIN_PSRAW128:
17663 case IX86_BUILTIN_PSRAW128_MASK:
17664 case IX86_BUILTIN_PSRAW256:
17665 case IX86_BUILTIN_PSRAW256_MASK:
17666 case IX86_BUILTIN_PSRAW512:
17667 case IX86_BUILTIN_PSRAWI:
17668 case IX86_BUILTIN_PSRAWI128:
17669 case IX86_BUILTIN_PSRAWI128_MASK:
17670 case IX86_BUILTIN_PSRAWI256:
17671 case IX86_BUILTIN_PSRAWI256_MASK:
17672 case IX86_BUILTIN_PSRAWI512:
17673 rcode = ASHIFTRT;
17674 is_vshift = false;
17675 goto do_shift;
17676 case IX86_BUILTIN_PSRLD:
17677 case IX86_BUILTIN_PSRLD128:
17678 case IX86_BUILTIN_PSRLD128_MASK:
17679 case IX86_BUILTIN_PSRLD256:
17680 case IX86_BUILTIN_PSRLD256_MASK:
17681 case IX86_BUILTIN_PSRLD512:
17682 case IX86_BUILTIN_PSRLDI:
17683 case IX86_BUILTIN_PSRLDI128:
17684 case IX86_BUILTIN_PSRLDI128_MASK:
17685 case IX86_BUILTIN_PSRLDI256:
17686 case IX86_BUILTIN_PSRLDI256_MASK:
17687 case IX86_BUILTIN_PSRLDI512:
17688 case IX86_BUILTIN_PSRLQ:
17689 case IX86_BUILTIN_PSRLQ128:
17690 case IX86_BUILTIN_PSRLQ128_MASK:
17691 case IX86_BUILTIN_PSRLQ256:
17692 case IX86_BUILTIN_PSRLQ256_MASK:
17693 case IX86_BUILTIN_PSRLQ512:
17694 case IX86_BUILTIN_PSRLQI:
17695 case IX86_BUILTIN_PSRLQI128:
17696 case IX86_BUILTIN_PSRLQI128_MASK:
17697 case IX86_BUILTIN_PSRLQI256:
17698 case IX86_BUILTIN_PSRLQI256_MASK:
17699 case IX86_BUILTIN_PSRLQI512:
17700 case IX86_BUILTIN_PSRLW:
17701 case IX86_BUILTIN_PSRLW128:
17702 case IX86_BUILTIN_PSRLW128_MASK:
17703 case IX86_BUILTIN_PSRLW256:
17704 case IX86_BUILTIN_PSRLW256_MASK:
17705 case IX86_BUILTIN_PSRLW512:
17706 case IX86_BUILTIN_PSRLWI:
17707 case IX86_BUILTIN_PSRLWI128:
17708 case IX86_BUILTIN_PSRLWI128_MASK:
17709 case IX86_BUILTIN_PSRLWI256:
17710 case IX86_BUILTIN_PSRLWI256_MASK:
17711 case IX86_BUILTIN_PSRLWI512:
17712 rcode = LSHIFTRT;
17713 is_vshift = false;
17714 goto do_shift;
17715 case IX86_BUILTIN_PSLLVV16HI:
17716 case IX86_BUILTIN_PSLLVV16SI:
17717 case IX86_BUILTIN_PSLLVV2DI:
17718 case IX86_BUILTIN_PSLLVV2DI_MASK:
17719 case IX86_BUILTIN_PSLLVV32HI:
17720 case IX86_BUILTIN_PSLLVV4DI:
17721 case IX86_BUILTIN_PSLLVV4DI_MASK:
17722 case IX86_BUILTIN_PSLLVV4SI:
17723 case IX86_BUILTIN_PSLLVV4SI_MASK:
17724 case IX86_BUILTIN_PSLLVV8DI:
17725 case IX86_BUILTIN_PSLLVV8HI:
17726 case IX86_BUILTIN_PSLLVV8SI:
17727 case IX86_BUILTIN_PSLLVV8SI_MASK:
17728 rcode = ASHIFT;
17729 is_vshift = true;
17730 goto do_shift;
17731 case IX86_BUILTIN_PSRAVQ128:
17732 case IX86_BUILTIN_PSRAVQ256:
17733 case IX86_BUILTIN_PSRAVV16HI:
17734 case IX86_BUILTIN_PSRAVV16SI:
17735 case IX86_BUILTIN_PSRAVV32HI:
17736 case IX86_BUILTIN_PSRAVV4SI:
17737 case IX86_BUILTIN_PSRAVV4SI_MASK:
17738 case IX86_BUILTIN_PSRAVV8DI:
17739 case IX86_BUILTIN_PSRAVV8HI:
17740 case IX86_BUILTIN_PSRAVV8SI:
17741 case IX86_BUILTIN_PSRAVV8SI_MASK:
17742 rcode = ASHIFTRT;
17743 is_vshift = true;
17744 goto do_shift;
17745 case IX86_BUILTIN_PSRLVV16HI:
17746 case IX86_BUILTIN_PSRLVV16SI:
17747 case IX86_BUILTIN_PSRLVV2DI:
17748 case IX86_BUILTIN_PSRLVV2DI_MASK:
17749 case IX86_BUILTIN_PSRLVV32HI:
17750 case IX86_BUILTIN_PSRLVV4DI:
17751 case IX86_BUILTIN_PSRLVV4DI_MASK:
17752 case IX86_BUILTIN_PSRLVV4SI:
17753 case IX86_BUILTIN_PSRLVV4SI_MASK:
17754 case IX86_BUILTIN_PSRLVV8DI:
17755 case IX86_BUILTIN_PSRLVV8HI:
17756 case IX86_BUILTIN_PSRLVV8SI:
17757 case IX86_BUILTIN_PSRLVV8SI_MASK:
17758 rcode = LSHIFTRT;
17759 is_vshift = true;
17760 goto do_shift;
17762 do_shift:
17763 gcc_assert (n_args >= 2);
17764 if (TREE_CODE (args[0]) != VECTOR_CST)
17765 break;
17766 mask = HOST_WIDE_INT_M1U;
17767 if (n_args > 2)
17769 /* This is masked shift. */
17770 if (!tree_fits_uhwi_p (args[n_args - 1])
17771 || TREE_SIDE_EFFECTS (args[n_args - 2]))
17772 break;
17773 mask = tree_to_uhwi (args[n_args - 1]);
17774 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
17775 mask |= HOST_WIDE_INT_M1U << elems;
17776 if (mask != HOST_WIDE_INT_M1U
17777 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
17778 break;
17779 if (mask == (HOST_WIDE_INT_M1U << elems))
17780 return args[n_args - 2];
17782 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
17783 break;
17784 if (tree tem = (is_vshift ? integer_one_node
17785 : ix86_vector_shift_count (args[1])))
17787 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
17788 unsigned HOST_WIDE_INT prec
17789 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
17790 if (count == 0 && mask == HOST_WIDE_INT_M1U)
17791 return args[0];
17792 if (count >= prec)
17794 if (rcode == ASHIFTRT)
17795 count = prec - 1;
17796 else if (mask == HOST_WIDE_INT_M1U)
17797 return build_zero_cst (TREE_TYPE (args[0]));
17799 tree countt = NULL_TREE;
17800 if (!is_vshift)
17802 if (count >= prec)
17803 countt = integer_zero_node;
17804 else
17805 countt = build_int_cst (integer_type_node, count);
17807 tree_vector_builder builder;
17808 if (mask != HOST_WIDE_INT_M1U || is_vshift)
17809 builder.new_vector (TREE_TYPE (args[0]),
17810 TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
17812 else
17813 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
17814 false);
17815 unsigned int cnt = builder.encoded_nelts ();
17816 for (unsigned int i = 0; i < cnt; ++i)
17818 tree elt = VECTOR_CST_ELT (args[0], i);
17819 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
17820 return NULL_TREE;
17821 tree type = TREE_TYPE (elt);
17822 if (rcode == LSHIFTRT)
17823 elt = fold_convert (unsigned_type_for (type), elt);
17824 if (is_vshift)
17826 countt = VECTOR_CST_ELT (args[1], i);
17827 if (TREE_CODE (countt) != INTEGER_CST
17828 || TREE_OVERFLOW (countt))
17829 return NULL_TREE;
17830 if (wi::neg_p (wi::to_wide (countt))
17831 || wi::to_widest (countt) >= prec)
17833 if (rcode == ASHIFTRT)
17834 countt = build_int_cst (TREE_TYPE (countt),
17835 prec - 1);
17836 else
17838 elt = build_zero_cst (TREE_TYPE (elt));
17839 countt = build_zero_cst (TREE_TYPE (countt));
17843 else if (count >= prec)
17844 elt = build_zero_cst (TREE_TYPE (elt));
17845 elt = const_binop (rcode == ASHIFT
17846 ? LSHIFT_EXPR : RSHIFT_EXPR,
17847 TREE_TYPE (elt), elt, countt);
17848 if (!elt || TREE_CODE (elt) != INTEGER_CST)
17849 return NULL_TREE;
17850 if (rcode == LSHIFTRT)
17851 elt = fold_convert (type, elt);
17852 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
17854 elt = VECTOR_CST_ELT (args[n_args - 2], i);
17855 if (TREE_CODE (elt) != INTEGER_CST
17856 || TREE_OVERFLOW (elt))
17857 return NULL_TREE;
17859 builder.quick_push (elt);
17861 return builder.build ();
17863 break;
17865 default:
17866 break;
17870 #ifdef SUBTARGET_FOLD_BUILTIN
17871 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
17872 #endif
17874 return NULL_TREE;
17877 /* Fold a MD builtin (use ix86_fold_builtin for folding into
17878 constant) in GIMPLE. */
17880 bool
17881 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
17883 gimple *stmt = gsi_stmt (*gsi);
17884 tree fndecl = gimple_call_fndecl (stmt);
17885 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
17886 int n_args = gimple_call_num_args (stmt);
17887 enum ix86_builtins fn_code
17888 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17889 tree decl = NULL_TREE;
17890 tree arg0, arg1, arg2;
17891 enum rtx_code rcode;
17892 unsigned HOST_WIDE_INT count;
17893 bool is_vshift;
17895 switch (fn_code)
17897 case IX86_BUILTIN_TZCNT32:
17898 decl = builtin_decl_implicit (BUILT_IN_CTZ);
17899 goto fold_tzcnt_lzcnt;
17901 case IX86_BUILTIN_TZCNT64:
17902 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
17903 goto fold_tzcnt_lzcnt;
17905 case IX86_BUILTIN_LZCNT32:
17906 decl = builtin_decl_implicit (BUILT_IN_CLZ);
17907 goto fold_tzcnt_lzcnt;
17909 case IX86_BUILTIN_LZCNT64:
17910 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
17911 goto fold_tzcnt_lzcnt;
17913 fold_tzcnt_lzcnt:
17914 gcc_assert (n_args == 1);
17915 arg0 = gimple_call_arg (stmt, 0);
17916 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
17918 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
17919 /* If arg0 is provably non-zero, optimize into generic
17920 __builtin_c[tl]z{,ll} function the middle-end handles
17921 better. */
17922 if (!expr_not_equal_to (arg0, wi::zero (prec)))
17923 return false;
17925 location_t loc = gimple_location (stmt);
17926 gimple *g = gimple_build_call (decl, 1, arg0);
17927 gimple_set_location (g, loc);
17928 tree lhs = make_ssa_name (integer_type_node);
17929 gimple_call_set_lhs (g, lhs);
17930 gsi_insert_before (gsi, g, GSI_SAME_STMT);
17931 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
17932 gimple_set_location (g, loc);
17933 gsi_replace (gsi, g, false);
17934 return true;
17936 break;
17938 case IX86_BUILTIN_BZHI32:
17939 case IX86_BUILTIN_BZHI64:
17940 gcc_assert (n_args == 2);
17941 arg1 = gimple_call_arg (stmt, 1);
17942 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
17944 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
17945 arg0 = gimple_call_arg (stmt, 0);
17946 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
17947 break;
17948 location_t loc = gimple_location (stmt);
17949 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17950 gimple_set_location (g, loc);
17951 gsi_replace (gsi, g, false);
17952 return true;
17954 break;
17956 case IX86_BUILTIN_PDEP32:
17957 case IX86_BUILTIN_PDEP64:
17958 case IX86_BUILTIN_PEXT32:
17959 case IX86_BUILTIN_PEXT64:
17960 gcc_assert (n_args == 2);
17961 arg1 = gimple_call_arg (stmt, 1);
17962 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
17964 location_t loc = gimple_location (stmt);
17965 arg0 = gimple_call_arg (stmt, 0);
17966 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17967 gimple_set_location (g, loc);
17968 gsi_replace (gsi, g, false);
17969 return true;
17971 break;
17973 case IX86_BUILTIN_PSLLD:
17974 case IX86_BUILTIN_PSLLD128:
17975 case IX86_BUILTIN_PSLLD128_MASK:
17976 case IX86_BUILTIN_PSLLD256:
17977 case IX86_BUILTIN_PSLLD256_MASK:
17978 case IX86_BUILTIN_PSLLD512:
17979 case IX86_BUILTIN_PSLLDI:
17980 case IX86_BUILTIN_PSLLDI128:
17981 case IX86_BUILTIN_PSLLDI128_MASK:
17982 case IX86_BUILTIN_PSLLDI256:
17983 case IX86_BUILTIN_PSLLDI256_MASK:
17984 case IX86_BUILTIN_PSLLDI512:
17985 case IX86_BUILTIN_PSLLQ:
17986 case IX86_BUILTIN_PSLLQ128:
17987 case IX86_BUILTIN_PSLLQ128_MASK:
17988 case IX86_BUILTIN_PSLLQ256:
17989 case IX86_BUILTIN_PSLLQ256_MASK:
17990 case IX86_BUILTIN_PSLLQ512:
17991 case IX86_BUILTIN_PSLLQI:
17992 case IX86_BUILTIN_PSLLQI128:
17993 case IX86_BUILTIN_PSLLQI128_MASK:
17994 case IX86_BUILTIN_PSLLQI256:
17995 case IX86_BUILTIN_PSLLQI256_MASK:
17996 case IX86_BUILTIN_PSLLQI512:
17997 case IX86_BUILTIN_PSLLW:
17998 case IX86_BUILTIN_PSLLW128:
17999 case IX86_BUILTIN_PSLLW128_MASK:
18000 case IX86_BUILTIN_PSLLW256:
18001 case IX86_BUILTIN_PSLLW256_MASK:
18002 case IX86_BUILTIN_PSLLW512_MASK:
18003 case IX86_BUILTIN_PSLLWI:
18004 case IX86_BUILTIN_PSLLWI128:
18005 case IX86_BUILTIN_PSLLWI128_MASK:
18006 case IX86_BUILTIN_PSLLWI256:
18007 case IX86_BUILTIN_PSLLWI256_MASK:
18008 case IX86_BUILTIN_PSLLWI512_MASK:
18009 rcode = ASHIFT;
18010 is_vshift = false;
18011 goto do_shift;
18012 case IX86_BUILTIN_PSRAD:
18013 case IX86_BUILTIN_PSRAD128:
18014 case IX86_BUILTIN_PSRAD128_MASK:
18015 case IX86_BUILTIN_PSRAD256:
18016 case IX86_BUILTIN_PSRAD256_MASK:
18017 case IX86_BUILTIN_PSRAD512:
18018 case IX86_BUILTIN_PSRADI:
18019 case IX86_BUILTIN_PSRADI128:
18020 case IX86_BUILTIN_PSRADI128_MASK:
18021 case IX86_BUILTIN_PSRADI256:
18022 case IX86_BUILTIN_PSRADI256_MASK:
18023 case IX86_BUILTIN_PSRADI512:
18024 case IX86_BUILTIN_PSRAQ128_MASK:
18025 case IX86_BUILTIN_PSRAQ256_MASK:
18026 case IX86_BUILTIN_PSRAQ512:
18027 case IX86_BUILTIN_PSRAQI128_MASK:
18028 case IX86_BUILTIN_PSRAQI256_MASK:
18029 case IX86_BUILTIN_PSRAQI512:
18030 case IX86_BUILTIN_PSRAW:
18031 case IX86_BUILTIN_PSRAW128:
18032 case IX86_BUILTIN_PSRAW128_MASK:
18033 case IX86_BUILTIN_PSRAW256:
18034 case IX86_BUILTIN_PSRAW256_MASK:
18035 case IX86_BUILTIN_PSRAW512:
18036 case IX86_BUILTIN_PSRAWI:
18037 case IX86_BUILTIN_PSRAWI128:
18038 case IX86_BUILTIN_PSRAWI128_MASK:
18039 case IX86_BUILTIN_PSRAWI256:
18040 case IX86_BUILTIN_PSRAWI256_MASK:
18041 case IX86_BUILTIN_PSRAWI512:
18042 rcode = ASHIFTRT;
18043 is_vshift = false;
18044 goto do_shift;
18045 case IX86_BUILTIN_PSRLD:
18046 case IX86_BUILTIN_PSRLD128:
18047 case IX86_BUILTIN_PSRLD128_MASK:
18048 case IX86_BUILTIN_PSRLD256:
18049 case IX86_BUILTIN_PSRLD256_MASK:
18050 case IX86_BUILTIN_PSRLD512:
18051 case IX86_BUILTIN_PSRLDI:
18052 case IX86_BUILTIN_PSRLDI128:
18053 case IX86_BUILTIN_PSRLDI128_MASK:
18054 case IX86_BUILTIN_PSRLDI256:
18055 case IX86_BUILTIN_PSRLDI256_MASK:
18056 case IX86_BUILTIN_PSRLDI512:
18057 case IX86_BUILTIN_PSRLQ:
18058 case IX86_BUILTIN_PSRLQ128:
18059 case IX86_BUILTIN_PSRLQ128_MASK:
18060 case IX86_BUILTIN_PSRLQ256:
18061 case IX86_BUILTIN_PSRLQ256_MASK:
18062 case IX86_BUILTIN_PSRLQ512:
18063 case IX86_BUILTIN_PSRLQI:
18064 case IX86_BUILTIN_PSRLQI128:
18065 case IX86_BUILTIN_PSRLQI128_MASK:
18066 case IX86_BUILTIN_PSRLQI256:
18067 case IX86_BUILTIN_PSRLQI256_MASK:
18068 case IX86_BUILTIN_PSRLQI512:
18069 case IX86_BUILTIN_PSRLW:
18070 case IX86_BUILTIN_PSRLW128:
18071 case IX86_BUILTIN_PSRLW128_MASK:
18072 case IX86_BUILTIN_PSRLW256:
18073 case IX86_BUILTIN_PSRLW256_MASK:
18074 case IX86_BUILTIN_PSRLW512:
18075 case IX86_BUILTIN_PSRLWI:
18076 case IX86_BUILTIN_PSRLWI128:
18077 case IX86_BUILTIN_PSRLWI128_MASK:
18078 case IX86_BUILTIN_PSRLWI256:
18079 case IX86_BUILTIN_PSRLWI256_MASK:
18080 case IX86_BUILTIN_PSRLWI512:
18081 rcode = LSHIFTRT;
18082 is_vshift = false;
18083 goto do_shift;
18084 case IX86_BUILTIN_PSLLVV16HI:
18085 case IX86_BUILTIN_PSLLVV16SI:
18086 case IX86_BUILTIN_PSLLVV2DI:
18087 case IX86_BUILTIN_PSLLVV2DI_MASK:
18088 case IX86_BUILTIN_PSLLVV32HI:
18089 case IX86_BUILTIN_PSLLVV4DI:
18090 case IX86_BUILTIN_PSLLVV4DI_MASK:
18091 case IX86_BUILTIN_PSLLVV4SI:
18092 case IX86_BUILTIN_PSLLVV4SI_MASK:
18093 case IX86_BUILTIN_PSLLVV8DI:
18094 case IX86_BUILTIN_PSLLVV8HI:
18095 case IX86_BUILTIN_PSLLVV8SI:
18096 case IX86_BUILTIN_PSLLVV8SI_MASK:
18097 rcode = ASHIFT;
18098 is_vshift = true;
18099 goto do_shift;
18100 case IX86_BUILTIN_PSRAVQ128:
18101 case IX86_BUILTIN_PSRAVQ256:
18102 case IX86_BUILTIN_PSRAVV16HI:
18103 case IX86_BUILTIN_PSRAVV16SI:
18104 case IX86_BUILTIN_PSRAVV32HI:
18105 case IX86_BUILTIN_PSRAVV4SI:
18106 case IX86_BUILTIN_PSRAVV4SI_MASK:
18107 case IX86_BUILTIN_PSRAVV8DI:
18108 case IX86_BUILTIN_PSRAVV8HI:
18109 case IX86_BUILTIN_PSRAVV8SI:
18110 case IX86_BUILTIN_PSRAVV8SI_MASK:
18111 rcode = ASHIFTRT;
18112 is_vshift = true;
18113 goto do_shift;
18114 case IX86_BUILTIN_PSRLVV16HI:
18115 case IX86_BUILTIN_PSRLVV16SI:
18116 case IX86_BUILTIN_PSRLVV2DI:
18117 case IX86_BUILTIN_PSRLVV2DI_MASK:
18118 case IX86_BUILTIN_PSRLVV32HI:
18119 case IX86_BUILTIN_PSRLVV4DI:
18120 case IX86_BUILTIN_PSRLVV4DI_MASK:
18121 case IX86_BUILTIN_PSRLVV4SI:
18122 case IX86_BUILTIN_PSRLVV4SI_MASK:
18123 case IX86_BUILTIN_PSRLVV8DI:
18124 case IX86_BUILTIN_PSRLVV8HI:
18125 case IX86_BUILTIN_PSRLVV8SI:
18126 case IX86_BUILTIN_PSRLVV8SI_MASK:
18127 rcode = LSHIFTRT;
18128 is_vshift = true;
18129 goto do_shift;
18131 do_shift:
18132 gcc_assert (n_args >= 2);
18133 arg0 = gimple_call_arg (stmt, 0);
18134 arg1 = gimple_call_arg (stmt, 1);
18135 if (n_args > 2)
18137 /* This is masked shift. Only optimize if the mask is all ones. */
18138 tree argl = gimple_call_arg (stmt, n_args - 1);
18139 if (!tree_fits_uhwi_p (argl))
18140 break;
18141 unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl);
18142 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
18143 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
18144 break;
18146 if (is_vshift)
18148 if (TREE_CODE (arg1) != VECTOR_CST)
18149 break;
18150 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
18151 if (integer_zerop (arg1))
18152 count = 0;
18153 else if (rcode == ASHIFTRT)
18154 break;
18155 else
18156 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
18158 tree elt = VECTOR_CST_ELT (arg1, i);
18159 if (!wi::neg_p (wi::to_wide (elt))
18160 && wi::to_widest (elt) < count)
18161 return false;
18164 else
18166 arg1 = ix86_vector_shift_count (arg1);
18167 if (!arg1)
18168 break;
18169 count = tree_to_uhwi (arg1);
18171 if (count == 0)
18173 /* Just return the first argument for shift by 0. */
18174 location_t loc = gimple_location (stmt);
18175 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
18176 gimple_set_location (g, loc);
18177 gsi_replace (gsi, g, false);
18178 return true;
18180 if (rcode != ASHIFTRT
18181 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
18183 /* For shift counts equal or greater than precision, except for
18184 arithmetic right shift the result is zero. */
18185 location_t loc = gimple_location (stmt);
18186 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
18187 build_zero_cst (TREE_TYPE (arg0)));
18188 gimple_set_location (g, loc);
18189 gsi_replace (gsi, g, false);
18190 return true;
18192 break;
18194 case IX86_BUILTIN_SHUFPD:
18195 arg2 = gimple_call_arg (stmt, 2);
18196 if (TREE_CODE (arg2) == INTEGER_CST)
18198 location_t loc = gimple_location (stmt);
18199 unsigned HOST_WIDE_INT imask = TREE_INT_CST_LOW (arg2);
18200 arg0 = gimple_call_arg (stmt, 0);
18201 arg1 = gimple_call_arg (stmt, 1);
18202 tree itype = long_long_integer_type_node;
18203 tree vtype = build_vector_type (itype, 2); /* V2DI */
18204 tree_vector_builder elts (vtype, 2, 1);
18205 /* Ignore bits other than the lowest 2. */
18206 elts.quick_push (build_int_cst (itype, imask & 1));
18207 imask >>= 1;
18208 elts.quick_push (build_int_cst (itype, 2 + (imask & 1)));
18209 tree omask = elts.build ();
18210 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
18211 VEC_PERM_EXPR,
18212 arg0, arg1, omask);
18213 gimple_set_location (g, loc);
18214 gsi_replace (gsi, g, false);
18215 return true;
18217 // Do not error yet, the constant could be propagated later?
18218 break;
18220 default:
18221 break;
18224 return false;
18227 /* Handler for an SVML-style interface to
18228 a library with vectorized intrinsics. */
18230 tree
18231 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
18233 char name[20];
18234 tree fntype, new_fndecl, args;
18235 unsigned arity;
18236 const char *bname;
18237 machine_mode el_mode, in_mode;
18238 int n, in_n;
18240 /* The SVML is suitable for unsafe math only. */
18241 if (!flag_unsafe_math_optimizations)
18242 return NULL_TREE;
18244 el_mode = TYPE_MODE (TREE_TYPE (type_out));
18245 n = TYPE_VECTOR_SUBPARTS (type_out);
18246 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18247 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18248 if (el_mode != in_mode
18249 || n != in_n)
18250 return NULL_TREE;
18252 switch (fn)
18254 CASE_CFN_EXP:
18255 CASE_CFN_LOG:
18256 CASE_CFN_LOG10:
18257 CASE_CFN_POW:
18258 CASE_CFN_TANH:
18259 CASE_CFN_TAN:
18260 CASE_CFN_ATAN:
18261 CASE_CFN_ATAN2:
18262 CASE_CFN_ATANH:
18263 CASE_CFN_CBRT:
18264 CASE_CFN_SINH:
18265 CASE_CFN_SIN:
18266 CASE_CFN_ASINH:
18267 CASE_CFN_ASIN:
18268 CASE_CFN_COSH:
18269 CASE_CFN_COS:
18270 CASE_CFN_ACOSH:
18271 CASE_CFN_ACOS:
18272 if ((el_mode != DFmode || n != 2)
18273 && (el_mode != SFmode || n != 4))
18274 return NULL_TREE;
18275 break;
18277 default:
18278 return NULL_TREE;
18281 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
18282 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18284 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
18285 strcpy (name, "vmlsLn4");
18286 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
18287 strcpy (name, "vmldLn2");
18288 else if (n == 4)
18290 sprintf (name, "vmls%s", bname+10);
18291 name[strlen (name)-1] = '4';
18293 else
18294 sprintf (name, "vmld%s2", bname+10);
18296 /* Convert to uppercase. */
18297 name[4] &= ~0x20;
18299 arity = 0;
18300 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
18301 arity++;
18303 if (arity == 1)
18304 fntype = build_function_type_list (type_out, type_in, NULL);
18305 else
18306 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
18308 /* Build a function declaration for the vectorized function. */
18309 new_fndecl = build_decl (BUILTINS_LOCATION,
18310 FUNCTION_DECL, get_identifier (name), fntype);
18311 TREE_PUBLIC (new_fndecl) = 1;
18312 DECL_EXTERNAL (new_fndecl) = 1;
18313 DECL_IS_NOVOPS (new_fndecl) = 1;
18314 TREE_READONLY (new_fndecl) = 1;
18316 return new_fndecl;
18319 /* Handler for an ACML-style interface to
18320 a library with vectorized intrinsics. */
18322 tree
18323 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
18325 char name[20] = "__vr.._";
18326 tree fntype, new_fndecl, args;
18327 unsigned arity;
18328 const char *bname;
18329 machine_mode el_mode, in_mode;
18330 int n, in_n;
18332 /* The ACML is 64bits only and suitable for unsafe math only as
18333 it does not correctly support parts of IEEE with the required
18334 precision such as denormals. */
18335 if (!TARGET_64BIT
18336 || !flag_unsafe_math_optimizations)
18337 return NULL_TREE;
18339 el_mode = TYPE_MODE (TREE_TYPE (type_out));
18340 n = TYPE_VECTOR_SUBPARTS (type_out);
18341 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18342 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18343 if (el_mode != in_mode
18344 || n != in_n)
18345 return NULL_TREE;
18347 switch (fn)
18349 CASE_CFN_SIN:
18350 CASE_CFN_COS:
18351 CASE_CFN_EXP:
18352 CASE_CFN_LOG:
18353 CASE_CFN_LOG2:
18354 CASE_CFN_LOG10:
18355 if (el_mode == DFmode && n == 2)
18357 name[4] = 'd';
18358 name[5] = '2';
18360 else if (el_mode == SFmode && n == 4)
18362 name[4] = 's';
18363 name[5] = '4';
18365 else
18366 return NULL_TREE;
18367 break;
18369 default:
18370 return NULL_TREE;
18373 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
18374 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18375 sprintf (name + 7, "%s", bname+10);
18377 arity = 0;
18378 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
18379 arity++;
18381 if (arity == 1)
18382 fntype = build_function_type_list (type_out, type_in, NULL);
18383 else
18384 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
18386 /* Build a function declaration for the vectorized function. */
18387 new_fndecl = build_decl (BUILTINS_LOCATION,
18388 FUNCTION_DECL, get_identifier (name), fntype);
18389 TREE_PUBLIC (new_fndecl) = 1;
18390 DECL_EXTERNAL (new_fndecl) = 1;
18391 DECL_IS_NOVOPS (new_fndecl) = 1;
18392 TREE_READONLY (new_fndecl) = 1;
18394 return new_fndecl;
18397 /* Returns a decl of a function that implements scatter store with
18398 register type VECTYPE and index type INDEX_TYPE and SCALE.
18399 Return NULL_TREE if it is not available. */
18401 static tree
18402 ix86_vectorize_builtin_scatter (const_tree vectype,
18403 const_tree index_type, int scale)
18405 bool si;
18406 enum ix86_builtins code;
18408 if (!TARGET_AVX512F)
18409 return NULL_TREE;
18411 if ((TREE_CODE (index_type) != INTEGER_TYPE
18412 && !POINTER_TYPE_P (index_type))
18413 || (TYPE_MODE (index_type) != SImode
18414 && TYPE_MODE (index_type) != DImode))
18415 return NULL_TREE;
18417 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
18418 return NULL_TREE;
18420 /* v*scatter* insn sign extends index to pointer mode. */
18421 if (TYPE_PRECISION (index_type) < POINTER_SIZE
18422 && TYPE_UNSIGNED (index_type))
18423 return NULL_TREE;
18425 /* Scale can be 1, 2, 4 or 8. */
18426 if (scale <= 0
18427 || scale > 8
18428 || (scale & (scale - 1)) != 0)
18429 return NULL_TREE;
18431 si = TYPE_MODE (index_type) == SImode;
18432 switch (TYPE_MODE (vectype))
18434 case E_V8DFmode:
18435 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
18436 break;
18437 case E_V8DImode:
18438 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
18439 break;
18440 case E_V16SFmode:
18441 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
18442 break;
18443 case E_V16SImode:
18444 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
18445 break;
18446 case E_V4DFmode:
18447 if (TARGET_AVX512VL)
18448 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
18449 else
18450 return NULL_TREE;
18451 break;
18452 case E_V4DImode:
18453 if (TARGET_AVX512VL)
18454 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
18455 else
18456 return NULL_TREE;
18457 break;
18458 case E_V8SFmode:
18459 if (TARGET_AVX512VL)
18460 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
18461 else
18462 return NULL_TREE;
18463 break;
18464 case E_V8SImode:
18465 if (TARGET_AVX512VL)
18466 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
18467 else
18468 return NULL_TREE;
18469 break;
18470 case E_V2DFmode:
18471 if (TARGET_AVX512VL)
18472 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
18473 else
18474 return NULL_TREE;
18475 break;
18476 case E_V2DImode:
18477 if (TARGET_AVX512VL)
18478 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
18479 else
18480 return NULL_TREE;
18481 break;
18482 case E_V4SFmode:
18483 if (TARGET_AVX512VL)
18484 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
18485 else
18486 return NULL_TREE;
18487 break;
18488 case E_V4SImode:
18489 if (TARGET_AVX512VL)
18490 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
18491 else
18492 return NULL_TREE;
18493 break;
18494 default:
18495 return NULL_TREE;
18498 return get_ix86_builtin (code);
18501 /* Return true if it is safe to use the rsqrt optabs to optimize
18502 1.0/sqrt. */
18504 static bool
18505 use_rsqrt_p ()
18507 return (TARGET_SSE && TARGET_SSE_MATH
18508 && flag_finite_math_only
18509 && !flag_trapping_math
18510 && flag_unsafe_math_optimizations);
18513 /* Helper for avx_vpermilps256_operand et al. This is also used by
18514 the expansion functions to turn the parallel back into a mask.
18515 The return value is 0 for no match and the imm8+1 for a match. */
18518 avx_vpermilp_parallel (rtx par, machine_mode mode)
18520 unsigned i, nelt = GET_MODE_NUNITS (mode);
18521 unsigned mask = 0;
18522 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
18524 if (XVECLEN (par, 0) != (int) nelt)
18525 return 0;
18527 /* Validate that all of the elements are constants, and not totally
18528 out of range. Copy the data into an integral array to make the
18529 subsequent checks easier. */
18530 for (i = 0; i < nelt; ++i)
18532 rtx er = XVECEXP (par, 0, i);
18533 unsigned HOST_WIDE_INT ei;
18535 if (!CONST_INT_P (er))
18536 return 0;
18537 ei = INTVAL (er);
18538 if (ei >= nelt)
18539 return 0;
18540 ipar[i] = ei;
18543 switch (mode)
18545 case E_V8DFmode:
18546 /* In the 512-bit DFmode case, we can only move elements within
18547 a 128-bit lane. First fill the second part of the mask,
18548 then fallthru. */
18549 for (i = 4; i < 6; ++i)
18551 if (ipar[i] < 4 || ipar[i] >= 6)
18552 return 0;
18553 mask |= (ipar[i] - 4) << i;
18555 for (i = 6; i < 8; ++i)
18557 if (ipar[i] < 6)
18558 return 0;
18559 mask |= (ipar[i] - 6) << i;
18561 /* FALLTHRU */
18563 case E_V4DFmode:
18564 /* In the 256-bit DFmode case, we can only move elements within
18565 a 128-bit lane. */
18566 for (i = 0; i < 2; ++i)
18568 if (ipar[i] >= 2)
18569 return 0;
18570 mask |= ipar[i] << i;
18572 for (i = 2; i < 4; ++i)
18574 if (ipar[i] < 2)
18575 return 0;
18576 mask |= (ipar[i] - 2) << i;
18578 break;
18580 case E_V16SFmode:
18581 /* In 512 bit SFmode case, permutation in the upper 256 bits
18582 must mirror the permutation in the lower 256-bits. */
18583 for (i = 0; i < 8; ++i)
18584 if (ipar[i] + 8 != ipar[i + 8])
18585 return 0;
18586 /* FALLTHRU */
18588 case E_V8SFmode:
18589 /* In 256 bit SFmode case, we have full freedom of
18590 movement within the low 128-bit lane, but the high 128-bit
18591 lane must mirror the exact same pattern. */
18592 for (i = 0; i < 4; ++i)
18593 if (ipar[i] + 4 != ipar[i + 4])
18594 return 0;
18595 nelt = 4;
18596 /* FALLTHRU */
18598 case E_V2DFmode:
18599 case E_V4SFmode:
18600 /* In the 128-bit case, we've full freedom in the placement of
18601 the elements from the source operand. */
18602 for (i = 0; i < nelt; ++i)
18603 mask |= ipar[i] << (i * (nelt / 2));
18604 break;
18606 default:
18607 gcc_unreachable ();
18610 /* Make sure success has a non-zero value by adding one. */
18611 return mask + 1;
18614 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
18615 the expansion functions to turn the parallel back into a mask.
18616 The return value is 0 for no match and the imm8+1 for a match. */
18619 avx_vperm2f128_parallel (rtx par, machine_mode mode)
18621 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
18622 unsigned mask = 0;
18623 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
18625 if (XVECLEN (par, 0) != (int) nelt)
18626 return 0;
18628 /* Validate that all of the elements are constants, and not totally
18629 out of range. Copy the data into an integral array to make the
18630 subsequent checks easier. */
18631 for (i = 0; i < nelt; ++i)
18633 rtx er = XVECEXP (par, 0, i);
18634 unsigned HOST_WIDE_INT ei;
18636 if (!CONST_INT_P (er))
18637 return 0;
18638 ei = INTVAL (er);
18639 if (ei >= 2 * nelt)
18640 return 0;
18641 ipar[i] = ei;
18644 /* Validate that the halves of the permute are halves. */
18645 for (i = 0; i < nelt2 - 1; ++i)
18646 if (ipar[i] + 1 != ipar[i + 1])
18647 return 0;
18648 for (i = nelt2; i < nelt - 1; ++i)
18649 if (ipar[i] + 1 != ipar[i + 1])
18650 return 0;
18652 /* Reconstruct the mask. */
18653 for (i = 0; i < 2; ++i)
18655 unsigned e = ipar[i * nelt2];
18656 if (e % nelt2)
18657 return 0;
18658 e /= nelt2;
18659 mask |= e << (i * 4);
18662 /* Make sure success has a non-zero value by adding one. */
18663 return mask + 1;
18666 /* Return a register priority for hard reg REGNO. */
18667 static int
18668 ix86_register_priority (int hard_regno)
18670 /* ebp and r13 as the base always wants a displacement, r12 as the
18671 base always wants an index. So discourage their usage in an
18672 address. */
18673 if (hard_regno == R12_REG || hard_regno == R13_REG)
18674 return 0;
18675 if (hard_regno == BP_REG)
18676 return 1;
18677 /* New x86-64 int registers result in bigger code size. Discourage
18678 them. */
18679 if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
18680 return 2;
18681 /* New x86-64 SSE registers result in bigger code size. Discourage
18682 them. */
18683 if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
18684 return 2;
18685 if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG))
18686 return 1;
18687 /* Usage of AX register results in smaller code. Prefer it. */
18688 if (hard_regno == AX_REG)
18689 return 4;
18690 return 3;
18693 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
18695 Put float CONST_DOUBLE in the constant pool instead of fp regs.
18696 QImode must go into class Q_REGS.
18697 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18698 movdf to do mem-to-mem moves through integer regs. */
18700 static reg_class_t
18701 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
18703 machine_mode mode = GET_MODE (x);
18705 /* We're only allowed to return a subclass of CLASS. Many of the
18706 following checks fail for NO_REGS, so eliminate that early. */
18707 if (regclass == NO_REGS)
18708 return NO_REGS;
18710 /* All classes can load zeros. */
18711 if (x == CONST0_RTX (mode))
18712 return regclass;
18714 /* Force constants into memory if we are loading a (nonzero) constant into
18715 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
18716 instructions to load from a constant. */
18717 if (CONSTANT_P (x)
18718 && (MAYBE_MMX_CLASS_P (regclass)
18719 || MAYBE_SSE_CLASS_P (regclass)
18720 || MAYBE_MASK_CLASS_P (regclass)))
18721 return NO_REGS;
18723 /* Floating-point constants need more complex checks. */
18724 if (CONST_DOUBLE_P (x))
18726 /* General regs can load everything. */
18727 if (INTEGER_CLASS_P (regclass))
18728 return regclass;
18730 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18731 zero above. We only want to wind up preferring 80387 registers if
18732 we plan on doing computation with them. */
18733 if (IS_STACK_MODE (mode)
18734 && standard_80387_constant_p (x) > 0)
18736 /* Limit class to FP regs. */
18737 if (FLOAT_CLASS_P (regclass))
18738 return FLOAT_REGS;
18741 return NO_REGS;
18744 /* Prefer SSE regs only, if we can use them for math. */
18745 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18746 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
18748 /* Generally when we see PLUS here, it's the function invariant
18749 (plus soft-fp const_int). Which can only be computed into general
18750 regs. */
18751 if (GET_CODE (x) == PLUS)
18752 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
18754 /* QImode constants are easy to load, but non-constant QImode data
18755 must go into Q_REGS or ALL_MASK_REGS. */
18756 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18758 if (Q_CLASS_P (regclass))
18759 return regclass;
18760 else if (reg_class_subset_p (Q_REGS, regclass))
18761 return Q_REGS;
18762 else if (MASK_CLASS_P (regclass))
18763 return regclass;
18764 else
18765 return NO_REGS;
18768 return regclass;
18771 /* Discourage putting floating-point values in SSE registers unless
18772 SSE math is being used, and likewise for the 387 registers. */
18773 static reg_class_t
18774 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
18776 /* Restrict the output reload class to the register bank that we are doing
18777 math on. If we would like not to return a subset of CLASS, reject this
18778 alternative: if reload cannot do this, it will still use its choice. */
18779 machine_mode mode = GET_MODE (x);
18780 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18781 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
18783 if (IS_STACK_MODE (mode))
18784 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
18786 return regclass;
18789 static reg_class_t
18790 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
18791 machine_mode mode, secondary_reload_info *sri)
18793 /* Double-word spills from general registers to non-offsettable memory
18794 references (zero-extended addresses) require special handling. */
18795 if (TARGET_64BIT
18796 && MEM_P (x)
18797 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
18798 && INTEGER_CLASS_P (rclass)
18799 && !offsettable_memref_p (x))
18801 sri->icode = (in_p
18802 ? CODE_FOR_reload_noff_load
18803 : CODE_FOR_reload_noff_store);
18804 /* Add the cost of moving address to a temporary. */
18805 sri->extra_cost = 1;
18807 return NO_REGS;
18810 /* QImode spills from non-QI registers require
18811 intermediate register on 32bit targets. */
18812 if (mode == QImode
18813 && ((!TARGET_64BIT && !in_p
18814 && INTEGER_CLASS_P (rclass)
18815 && MAYBE_NON_Q_CLASS_P (rclass))
18816 || (!TARGET_AVX512DQ
18817 && MAYBE_MASK_CLASS_P (rclass))))
18819 int regno = true_regnum (x);
18821 /* Return Q_REGS if the operand is in memory. */
18822 if (regno == -1)
18823 return Q_REGS;
18825 return NO_REGS;
18828 /* This condition handles corner case where an expression involving
18829 pointers gets vectorized. We're trying to use the address of a
18830 stack slot as a vector initializer.
18832 (set (reg:V2DI 74 [ vect_cst_.2 ])
18833 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
18835 Eventually frame gets turned into sp+offset like this:
18837 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18838 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18839 (const_int 392 [0x188]))))
18841 That later gets turned into:
18843 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18844 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18845 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
18847 We'll have the following reload recorded:
18849 Reload 0: reload_in (DI) =
18850 (plus:DI (reg/f:DI 7 sp)
18851 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
18852 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18853 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
18854 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
18855 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18856 reload_reg_rtx: (reg:V2DI 22 xmm1)
18858 Which isn't going to work since SSE instructions can't handle scalar
18859 additions. Returning GENERAL_REGS forces the addition into integer
18860 register and reload can handle subsequent reloads without problems. */
18862 if (in_p && GET_CODE (x) == PLUS
18863 && SSE_CLASS_P (rclass)
18864 && SCALAR_INT_MODE_P (mode))
18865 return GENERAL_REGS;
18867 return NO_REGS;
18870 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
18872 static bool
18873 ix86_class_likely_spilled_p (reg_class_t rclass)
18875 switch (rclass)
18877 case AREG:
18878 case DREG:
18879 case CREG:
18880 case BREG:
18881 case AD_REGS:
18882 case SIREG:
18883 case DIREG:
18884 case SSE_FIRST_REG:
18885 case FP_TOP_REG:
18886 case FP_SECOND_REG:
18887 return true;
18889 default:
18890 break;
18893 return false;
18896 /* If we are copying between registers from different register sets
18897 (e.g. FP and integer), we may need a memory location.
18899 The function can't work reliably when one of the CLASSES is a class
18900 containing registers from multiple sets. We avoid this by never combining
18901 different sets in a single alternative in the machine description.
18902 Ensure that this constraint holds to avoid unexpected surprises.
18904 When STRICT is false, we are being called from REGISTER_MOVE_COST,
18905 so do not enforce these sanity checks.
18907 To optimize register_move_cost performance, define inline variant. */
18909 static inline bool
18910 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18911 reg_class_t class2, int strict)
18913 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
18914 return false;
18916 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18917 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18918 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18919 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18920 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18921 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
18922 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
18923 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
18925 gcc_assert (!strict || lra_in_progress);
18926 return true;
18929 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18930 return true;
18932 /* ??? This is a lie. We do have moves between mmx/general, and for
18933 mmx/sse2. But by saying we need secondary memory we discourage the
18934 register allocator from using the mmx registers unless needed. */
18935 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18936 return true;
18938 /* Between mask and general, we have moves no larger than word size. */
18939 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
18941 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
18942 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18943 return true;
18946 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18948 /* SSE1 doesn't have any direct moves from other classes. */
18949 if (!TARGET_SSE2)
18950 return true;
18952 /* Between SSE and general, we have moves no larger than word size. */
18953 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
18954 || GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)
18955 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18956 return true;
18958 /* If the target says that inter-unit moves are more expensive
18959 than moving through memory, then don't generate them. */
18960 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
18961 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
18962 return true;
18965 return false;
18968 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
18970 static bool
18971 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18972 reg_class_t class2)
18974 return inline_secondary_memory_needed (mode, class1, class2, true);
18977 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
18979 get_secondary_mem widens integral modes to BITS_PER_WORD.
18980 There is no need to emit full 64 bit move on 64 bit targets
18981 for integral modes that can be moved using 32 bit move. */
18983 static machine_mode
18984 ix86_secondary_memory_needed_mode (machine_mode mode)
18986 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
18987 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
18988 return mode;
18991 /* Implement the TARGET_CLASS_MAX_NREGS hook.
18993 On the 80386, this is the size of MODE in words,
18994 except in the FP regs, where a single reg is always enough. */
18996 static unsigned char
18997 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
18999 if (MAYBE_INTEGER_CLASS_P (rclass))
19001 if (mode == XFmode)
19002 return (TARGET_64BIT ? 2 : 3);
19003 else if (mode == XCmode)
19004 return (TARGET_64BIT ? 4 : 6);
19005 else
19006 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
19008 else
19010 if (COMPLEX_MODE_P (mode))
19011 return 2;
19012 else
19013 return 1;
19017 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
19019 static bool
19020 ix86_can_change_mode_class (machine_mode from, machine_mode to,
19021 reg_class_t regclass)
19023 if (from == to)
19024 return true;
19026 /* x87 registers can't do subreg at all, as all values are reformatted
19027 to extended precision. */
19028 if (MAYBE_FLOAT_CLASS_P (regclass))
19029 return false;
19031 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
19033 /* Vector registers do not support QI or HImode loads. If we don't
19034 disallow a change to these modes, reload will assume it's ok to
19035 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19036 the vec_dupv4hi pattern. */
19037 if (GET_MODE_SIZE (from) < 4)
19038 return false;
19041 return true;
19044 /* Return index of MODE in the sse load/store tables. */
19046 static inline int
19047 sse_store_index (machine_mode mode)
19049 switch (GET_MODE_SIZE (mode))
19051 case 4:
19052 return 0;
19053 case 8:
19054 return 1;
19055 case 16:
19056 return 2;
19057 case 32:
19058 return 3;
19059 case 64:
19060 return 4;
19061 default:
19062 return -1;
19066 /* Return the cost of moving data of mode M between a
19067 register and memory. A value of 2 is the default; this cost is
19068 relative to those in `REGISTER_MOVE_COST'.
19070 This function is used extensively by register_move_cost that is used to
19071 build tables at startup. Make it inline in this case.
19072 When IN is 2, return maximum of in and out move cost.
19074 If moving between registers and memory is more expensive than
19075 between two registers, you should define this macro to express the
19076 relative cost.
19078 Model also increased moving costs of QImode registers in non
19079 Q_REGS classes.
19081 static inline int
19082 inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
19084 int cost;
19085 if (FLOAT_CLASS_P (regclass))
19087 int index;
19088 switch (mode)
19090 case E_SFmode:
19091 index = 0;
19092 break;
19093 case E_DFmode:
19094 index = 1;
19095 break;
19096 case E_XFmode:
19097 index = 2;
19098 break;
19099 default:
19100 return 100;
19102 if (in == 2)
19103 return MAX (ix86_cost->hard_register.fp_load [index],
19104 ix86_cost->hard_register.fp_store [index]);
19105 return in ? ix86_cost->hard_register.fp_load [index]
19106 : ix86_cost->hard_register.fp_store [index];
19108 if (SSE_CLASS_P (regclass))
19110 int index = sse_store_index (mode);
19111 if (index == -1)
19112 return 100;
19113 if (in == 2)
19114 return MAX (ix86_cost->hard_register.sse_load [index],
19115 ix86_cost->hard_register.sse_store [index]);
19116 return in ? ix86_cost->hard_register.sse_load [index]
19117 : ix86_cost->hard_register.sse_store [index];
19119 if (MASK_CLASS_P (regclass))
19121 int index;
19122 switch (GET_MODE_SIZE (mode))
19124 case 1:
19125 index = 0;
19126 break;
19127 case 2:
19128 index = 1;
19129 break;
19130 /* DImode loads and stores assumed to cost the same as SImode. */
19131 default:
19132 index = 2;
19133 break;
19136 if (in == 2)
19137 return MAX (ix86_cost->hard_register.mask_load[index],
19138 ix86_cost->hard_register.mask_store[index]);
19139 return in ? ix86_cost->hard_register.mask_load[2]
19140 : ix86_cost->hard_register.mask_store[2];
19142 if (MMX_CLASS_P (regclass))
19144 int index;
19145 switch (GET_MODE_SIZE (mode))
19147 case 4:
19148 index = 0;
19149 break;
19150 case 8:
19151 index = 1;
19152 break;
19153 default:
19154 return 100;
19156 if (in == 2)
19157 return MAX (ix86_cost->hard_register.mmx_load [index],
19158 ix86_cost->hard_register.mmx_store [index]);
19159 return in ? ix86_cost->hard_register.mmx_load [index]
19160 : ix86_cost->hard_register.mmx_store [index];
19162 switch (GET_MODE_SIZE (mode))
19164 case 1:
19165 if (Q_CLASS_P (regclass) || TARGET_64BIT)
19167 if (!in)
19168 return ix86_cost->hard_register.int_store[0];
19169 if (TARGET_PARTIAL_REG_DEPENDENCY
19170 && optimize_function_for_speed_p (cfun))
19171 cost = ix86_cost->hard_register.movzbl_load;
19172 else
19173 cost = ix86_cost->hard_register.int_load[0];
19174 if (in == 2)
19175 return MAX (cost, ix86_cost->hard_register.int_store[0]);
19176 return cost;
19178 else
19180 if (in == 2)
19181 return MAX (ix86_cost->hard_register.movzbl_load,
19182 ix86_cost->hard_register.int_store[0] + 4);
19183 if (in)
19184 return ix86_cost->hard_register.movzbl_load;
19185 else
19186 return ix86_cost->hard_register.int_store[0] + 4;
19188 break;
19189 case 2:
19190 if (in == 2)
19191 return MAX (ix86_cost->hard_register.int_load[1],
19192 ix86_cost->hard_register.int_store[1]);
19193 return in ? ix86_cost->hard_register.int_load[1]
19194 : ix86_cost->hard_register.int_store[1];
19195 default:
19196 if (in == 2)
19197 cost = MAX (ix86_cost->hard_register.int_load[2],
19198 ix86_cost->hard_register.int_store[2]);
19199 else if (in)
19200 cost = ix86_cost->hard_register.int_load[2];
19201 else
19202 cost = ix86_cost->hard_register.int_store[2];
19203 /* Multiply with the number of GPR moves needed. */
19204 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
19208 static int
19209 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
19211 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
19215 /* Return the cost of moving data from a register in class CLASS1 to
19216 one in class CLASS2.
19218 It is not required that the cost always equal 2 when FROM is the same as TO;
19219 on some machines it is expensive to move between registers if they are not
19220 general registers. */
19222 static int
19223 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
19224 reg_class_t class2_i)
19226 enum reg_class class1 = (enum reg_class) class1_i;
19227 enum reg_class class2 = (enum reg_class) class2_i;
19229 /* In case we require secondary memory, compute cost of the store followed
19230 by load. In order to avoid bad register allocation choices, we need
19231 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19233 if (inline_secondary_memory_needed (mode, class1, class2, false))
19235 int cost = 1;
19237 cost += inline_memory_move_cost (mode, class1, 2);
19238 cost += inline_memory_move_cost (mode, class2, 2);
19240 /* In case of copying from general_purpose_register we may emit multiple
19241 stores followed by single load causing memory size mismatch stall.
19242 Count this as arbitrarily high cost of 20. */
19243 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
19244 && TARGET_MEMORY_MISMATCH_STALL
19245 && targetm.class_max_nregs (class1, mode)
19246 > targetm.class_max_nregs (class2, mode))
19247 cost += 20;
19249 /* In the case of FP/MMX moves, the registers actually overlap, and we
19250 have to switch modes in order to treat them differently. */
19251 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
19252 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
19253 cost += 20;
19255 return cost;
19258 /* Moves between MMX and non-MMX units require secondary memory. */
19259 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
19260 gcc_unreachable ();
19262 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19263 return (SSE_CLASS_P (class1)
19264 ? ix86_cost->hard_register.sse_to_integer
19265 : ix86_cost->hard_register.integer_to_sse);
19267 /* Moves between mask register and GPR. */
19268 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
19270 return (MASK_CLASS_P (class1)
19271 ? ix86_cost->hard_register.mask_to_integer
19272 : ix86_cost->hard_register.integer_to_mask);
19274 /* Moving between mask registers. */
19275 if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
19276 return ix86_cost->hard_register.mask_move;
19278 if (MAYBE_FLOAT_CLASS_P (class1))
19279 return ix86_cost->hard_register.fp_move;
19280 if (MAYBE_SSE_CLASS_P (class1))
19282 if (GET_MODE_BITSIZE (mode) <= 128)
19283 return ix86_cost->hard_register.xmm_move;
19284 if (GET_MODE_BITSIZE (mode) <= 256)
19285 return ix86_cost->hard_register.ymm_move;
19286 return ix86_cost->hard_register.zmm_move;
19288 if (MAYBE_MMX_CLASS_P (class1))
19289 return ix86_cost->hard_register.mmx_move;
19290 return 2;
19293 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
19294 words of a value of mode MODE but can be less for certain modes in
19295 special long registers.
19297 Actually there are no two word move instructions for consecutive
19298 registers. And only registers 0-3 may have mov byte instructions
19299 applied to them. */
19301 static unsigned int
19302 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
19304 if (GENERAL_REGNO_P (regno))
19306 if (mode == XFmode)
19307 return TARGET_64BIT ? 2 : 3;
19308 if (mode == XCmode)
19309 return TARGET_64BIT ? 4 : 6;
19310 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
19312 if (COMPLEX_MODE_P (mode))
19313 return 2;
19314 /* Register pair for mask registers. */
19315 if (mode == P2QImode || mode == P2HImode)
19316 return 2;
19317 if (mode == V64SFmode || mode == V64SImode)
19318 return 4;
19319 return 1;
19322 /* Implement REGMODE_NATURAL_SIZE(MODE). */
19323 unsigned int
19324 ix86_regmode_natural_size (machine_mode mode)
19326 if (mode == P2HImode || mode == P2QImode)
19327 return GET_MODE_SIZE (mode) / 2;
19328 return UNITS_PER_WORD;
19331 /* Implement TARGET_HARD_REGNO_MODE_OK. */
19333 static bool
19334 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
19336 /* Flags and only flags can only hold CCmode values. */
19337 if (CC_REGNO_P (regno))
19338 return GET_MODE_CLASS (mode) == MODE_CC;
19339 if (GET_MODE_CLASS (mode) == MODE_CC
19340 || GET_MODE_CLASS (mode) == MODE_RANDOM)
19341 return false;
19342 if (STACK_REGNO_P (regno))
19343 return VALID_FP_MODE_P (mode);
19344 if (MASK_REGNO_P (regno))
19346 /* Register pair only starts at even register number. */
19347 if ((mode == P2QImode || mode == P2HImode))
19348 return MASK_PAIR_REGNO_P(regno);
19350 return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
19351 || (TARGET_AVX512BW
19352 && VALID_MASK_AVX512BW_MODE (mode)));
19355 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19356 return false;
19358 if (SSE_REGNO_P (regno))
19360 /* We implement the move patterns for all vector modes into and
19361 out of SSE registers, even when no operation instructions
19362 are available. */
19364 /* For AVX-512 we allow, regardless of regno:
19365 - XI mode
19366 - any of 512-bit wide vector mode
19367 - any scalar mode. */
19368 if (TARGET_AVX512F
19369 && (mode == XImode
19370 || VALID_AVX512F_REG_MODE (mode)
19371 || VALID_AVX512F_SCALAR_MODE (mode)))
19372 return true;
19374 /* For AVX-5124FMAPS or AVX-5124VNNIW
19375 allow V64SF and V64SI modes for special regnos. */
19376 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
19377 && (mode == V64SFmode || mode == V64SImode)
19378 && MOD4_SSE_REGNO_P (regno))
19379 return true;
19381 /* TODO check for QI/HI scalars. */
19382 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
19383 if (TARGET_AVX512VL
19384 && (mode == OImode
19385 || mode == TImode
19386 || VALID_AVX256_REG_MODE (mode)
19387 || VALID_AVX512VL_128_REG_MODE (mode)))
19388 return true;
19390 /* xmm16-xmm31 are only available for AVX-512. */
19391 if (EXT_REX_SSE_REGNO_P (regno))
19392 return false;
19394 /* OImode and AVX modes are available only when AVX is enabled. */
19395 return ((TARGET_AVX
19396 && VALID_AVX256_REG_OR_OI_MODE (mode))
19397 || VALID_SSE_REG_MODE (mode)
19398 || VALID_SSE2_REG_MODE (mode)
19399 || VALID_MMX_REG_MODE (mode)
19400 || VALID_MMX_REG_MODE_3DNOW (mode));
19402 if (MMX_REGNO_P (regno))
19404 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19405 so if the register is available at all, then we can move data of
19406 the given mode into or out of it. */
19407 return (VALID_MMX_REG_MODE (mode)
19408 || VALID_MMX_REG_MODE_3DNOW (mode));
19411 if (mode == QImode)
19413 /* Take care for QImode values - they can be in non-QI regs,
19414 but then they do cause partial register stalls. */
19415 if (ANY_QI_REGNO_P (regno))
19416 return true;
19417 if (!TARGET_PARTIAL_REG_STALL)
19418 return true;
19419 /* LRA checks if the hard register is OK for the given mode.
19420 QImode values can live in non-QI regs, so we allow all
19421 registers here. */
19422 if (lra_in_progress)
19423 return true;
19424 return !can_create_pseudo_p ();
19426 /* We handle both integer and floats in the general purpose registers. */
19427 else if (VALID_INT_MODE_P (mode))
19428 return true;
19429 else if (VALID_FP_MODE_P (mode))
19430 return true;
19431 else if (VALID_DFP_MODE_P (mode))
19432 return true;
19433 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19434 on to use that value in smaller contexts, this can easily force a
19435 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19436 supporting DImode, allow it. */
19437 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19438 return true;
19440 return false;
19443 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
19444 saves SSE registers across calls is Win64 (thus no need to check the
19445 current ABI here), and with AVX enabled Win64 only guarantees that
19446 the low 16 bytes are saved. */
19448 static bool
19449 ix86_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
19450 machine_mode mode)
19452 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
19455 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19456 tieable integer mode. */
19458 static bool
19459 ix86_tieable_integer_mode_p (machine_mode mode)
19461 switch (mode)
19463 case E_HImode:
19464 case E_SImode:
19465 return true;
19467 case E_QImode:
19468 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19470 case E_DImode:
19471 return TARGET_64BIT;
19473 default:
19474 return false;
19478 /* Implement TARGET_MODES_TIEABLE_P.
19480 Return true if MODE1 is accessible in a register that can hold MODE2
19481 without copying. That is, all register classes that can hold MODE2
19482 can also hold MODE1. */
19484 static bool
19485 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
19487 if (mode1 == mode2)
19488 return true;
19490 if (ix86_tieable_integer_mode_p (mode1)
19491 && ix86_tieable_integer_mode_p (mode2))
19492 return true;
19494 /* MODE2 being XFmode implies fp stack or general regs, which means we
19495 can tie any smaller floating point modes to it. Note that we do not
19496 tie this with TFmode. */
19497 if (mode2 == XFmode)
19498 return mode1 == SFmode || mode1 == DFmode;
19500 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19501 that we can tie it with SFmode. */
19502 if (mode2 == DFmode)
19503 return mode1 == SFmode;
19505 /* If MODE2 is only appropriate for an SSE register, then tie with
19506 any other mode acceptable to SSE registers. */
19507 if (GET_MODE_SIZE (mode2) == 64
19508 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19509 return (GET_MODE_SIZE (mode1) == 64
19510 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19511 if (GET_MODE_SIZE (mode2) == 32
19512 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19513 return (GET_MODE_SIZE (mode1) == 32
19514 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19515 if (GET_MODE_SIZE (mode2) == 16
19516 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19517 return (GET_MODE_SIZE (mode1) == 16
19518 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19520 /* If MODE2 is appropriate for an MMX register, then tie
19521 with any other mode acceptable to MMX registers. */
19522 if (GET_MODE_SIZE (mode2) == 8
19523 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19524 return (GET_MODE_SIZE (mode1) == 8
19525 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19527 return false;
19530 /* Return the cost of moving between two registers of mode MODE. */
19532 static int
19533 ix86_set_reg_reg_cost (machine_mode mode)
19535 unsigned int units = UNITS_PER_WORD;
19537 switch (GET_MODE_CLASS (mode))
19539 default:
19540 break;
19542 case MODE_CC:
19543 units = GET_MODE_SIZE (CCmode);
19544 break;
19546 case MODE_FLOAT:
19547 if ((TARGET_SSE && mode == TFmode)
19548 || (TARGET_80387 && mode == XFmode)
19549 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
19550 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
19551 units = GET_MODE_SIZE (mode);
19552 break;
19554 case MODE_COMPLEX_FLOAT:
19555 if ((TARGET_SSE && mode == TCmode)
19556 || (TARGET_80387 && mode == XCmode)
19557 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
19558 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
19559 units = GET_MODE_SIZE (mode);
19560 break;
19562 case MODE_VECTOR_INT:
19563 case MODE_VECTOR_FLOAT:
19564 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
19565 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
19566 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19567 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19568 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
19569 && VALID_MMX_REG_MODE (mode)))
19570 units = GET_MODE_SIZE (mode);
19573 /* Return the cost of moving between two registers of mode MODE,
19574 assuming that the move will be in pieces of at most UNITS bytes. */
19575 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
19578 /* Return cost of vector operation in MODE given that scalar version has
19579 COST. */
19581 static int
19582 ix86_vec_cost (machine_mode mode, int cost)
19584 if (!VECTOR_MODE_P (mode))
19585 return cost;
19587 if (GET_MODE_BITSIZE (mode) == 128
19588 && TARGET_SSE_SPLIT_REGS)
19589 return cost * 2;
19590 if (GET_MODE_BITSIZE (mode) > 128
19591 && TARGET_AVX256_SPLIT_REGS)
19592 return cost * GET_MODE_BITSIZE (mode) / 128;
19593 return cost;
19596 /* Return cost of multiplication in MODE. */
19598 static int
19599 ix86_multiplication_cost (const struct processor_costs *cost,
19600 enum machine_mode mode)
19602 machine_mode inner_mode = mode;
19603 if (VECTOR_MODE_P (mode))
19604 inner_mode = GET_MODE_INNER (mode);
19606 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19607 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
19608 else if (X87_FLOAT_MODE_P (mode))
19609 return cost->fmul;
19610 else if (FLOAT_MODE_P (mode))
19611 return ix86_vec_cost (mode,
19612 inner_mode == DFmode ? cost->mulsd : cost->mulss);
19613 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19615 /* vpmullq is used in this case. No emulation is needed. */
19616 if (TARGET_AVX512DQ)
19617 return ix86_vec_cost (mode, cost->mulss);
19619 /* V*QImode is emulated with 7-13 insns. */
19620 if (mode == V16QImode || mode == V32QImode)
19622 int extra = 11;
19623 if (TARGET_XOP && mode == V16QImode)
19624 extra = 5;
19625 else if (TARGET_SSSE3)
19626 extra = 6;
19627 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra);
19629 /* V*DImode is emulated with 5-8 insns. */
19630 else if (mode == V2DImode || mode == V4DImode)
19632 if (TARGET_XOP && mode == V2DImode)
19633 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3);
19634 else
19635 return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
19637 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
19638 insns, including two PMULUDQ. */
19639 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
19640 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
19641 else
19642 return ix86_vec_cost (mode, cost->mulss);
19644 else
19645 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
19648 /* Return cost of multiplication in MODE. */
19650 static int
19651 ix86_division_cost (const struct processor_costs *cost,
19652 enum machine_mode mode)
19654 machine_mode inner_mode = mode;
19655 if (VECTOR_MODE_P (mode))
19656 inner_mode = GET_MODE_INNER (mode);
19658 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19659 return inner_mode == DFmode ? cost->divsd : cost->divss;
19660 else if (X87_FLOAT_MODE_P (mode))
19661 return cost->fdiv;
19662 else if (FLOAT_MODE_P (mode))
19663 return ix86_vec_cost (mode,
19664 inner_mode == DFmode ? cost->divsd : cost->divss);
19665 else
19666 return cost->divide[MODE_INDEX (mode)];
19669 #define COSTS_N_BYTES(N) ((N) * 2)
19671 /* Return cost of shift in MODE.
19672 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
19673 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
19674 if op1 is a result of subreg.
19676 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
19678 static int
19679 ix86_shift_rotate_cost (const struct processor_costs *cost,
19680 enum machine_mode mode, bool constant_op1,
19681 HOST_WIDE_INT op1_val,
19682 bool speed,
19683 bool and_in_op1,
19684 bool shift_and_truncate,
19685 bool *skip_op0, bool *skip_op1)
19687 if (skip_op0)
19688 *skip_op0 = *skip_op1 = false;
19689 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19691 /* V*QImode is emulated with 1-11 insns. */
19692 if (mode == V16QImode || mode == V32QImode)
19694 int count = 11;
19695 if (TARGET_XOP && mode == V16QImode)
19697 /* For XOP we use vpshab, which requires a broadcast of the
19698 value to the variable shift insn. For constants this
19699 means a V16Q const in mem; even when we can perform the
19700 shift with one insn set the cost to prefer paddb. */
19701 if (constant_op1)
19703 if (skip_op1)
19704 *skip_op1 = true;
19705 return ix86_vec_cost (mode,
19706 cost->sse_op
19707 + (speed
19709 : COSTS_N_BYTES
19710 (GET_MODE_UNIT_SIZE (mode))));
19712 count = 3;
19714 else if (TARGET_SSSE3)
19715 count = 7;
19716 return ix86_vec_cost (mode, cost->sse_op * count);
19718 else
19719 return ix86_vec_cost (mode, cost->sse_op);
19721 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19723 if (constant_op1)
19725 if (op1_val > 32)
19726 return cost->shift_const + COSTS_N_INSNS (2);
19727 else
19728 return cost->shift_const * 2;
19730 else
19732 if (and_in_op1)
19733 return cost->shift_var * 2;
19734 else
19735 return cost->shift_var * 6 + COSTS_N_INSNS (2);
19738 else
19740 if (constant_op1)
19741 return cost->shift_const;
19742 else if (shift_and_truncate)
19744 if (skip_op0)
19745 *skip_op0 = *skip_op1 = true;
19746 /* Return the cost after shift-and truncation. */
19747 return cost->shift_var;
19749 else
19750 return cost->shift_var;
19752 return cost->shift_const;
19755 /* Compute a (partial) cost for rtx X. Return true if the complete
19756 cost has been computed, and false if subexpressions should be
19757 scanned. In either case, *TOTAL contains the cost result. */
19759 static bool
19760 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
19761 int *total, bool speed)
19763 rtx mask;
19764 enum rtx_code code = GET_CODE (x);
19765 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
19766 const struct processor_costs *cost
19767 = speed ? ix86_tune_cost : &ix86_size_cost;
19768 int src_cost;
19770 switch (code)
19772 case SET:
19773 if (register_operand (SET_DEST (x), VOIDmode)
19774 && register_operand (SET_SRC (x), VOIDmode))
19776 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
19777 return true;
19780 if (register_operand (SET_SRC (x), VOIDmode))
19781 /* Avoid potentially incorrect high cost from rtx_costs
19782 for non-tieable SUBREGs. */
19783 src_cost = 0;
19784 else
19786 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
19788 if (CONSTANT_P (SET_SRC (x)))
19789 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
19790 a small value, possibly zero for cheap constants. */
19791 src_cost += COSTS_N_INSNS (1);
19794 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
19795 return true;
19797 case CONST_INT:
19798 case CONST:
19799 case LABEL_REF:
19800 case SYMBOL_REF:
19801 if (x86_64_immediate_operand (x, VOIDmode))
19802 *total = 0;
19803 else
19804 *total = 1;
19805 return true;
19807 case CONST_DOUBLE:
19808 if (IS_STACK_MODE (mode))
19809 switch (standard_80387_constant_p (x))
19811 case -1:
19812 case 0:
19813 break;
19814 case 1: /* 0.0 */
19815 *total = 1;
19816 return true;
19817 default: /* Other constants */
19818 *total = 2;
19819 return true;
19821 /* FALLTHRU */
19823 case CONST_VECTOR:
19824 switch (standard_sse_constant_p (x, mode))
19826 case 0:
19827 break;
19828 case 1: /* 0: xor eliminates false dependency */
19829 *total = 0;
19830 return true;
19831 default: /* -1: cmp contains false dependency */
19832 *total = 1;
19833 return true;
19835 /* FALLTHRU */
19837 case CONST_WIDE_INT:
19838 /* Fall back to (MEM (SYMBOL_REF)), since that's where
19839 it'll probably end up. Add a penalty for size. */
19840 *total = (COSTS_N_INSNS (1)
19841 + (!TARGET_64BIT && flag_pic)
19842 + (GET_MODE_SIZE (mode) <= 4
19843 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
19844 return true;
19846 case ZERO_EXTEND:
19847 /* The zero extensions is often completely free on x86_64, so make
19848 it as cheap as possible. */
19849 if (TARGET_64BIT && mode == DImode
19850 && GET_MODE (XEXP (x, 0)) == SImode)
19851 *total = 1;
19852 else if (TARGET_ZERO_EXTEND_WITH_AND)
19853 *total = cost->add;
19854 else
19855 *total = cost->movzx;
19856 return false;
19858 case SIGN_EXTEND:
19859 *total = cost->movsx;
19860 return false;
19862 case ASHIFT:
19863 if (SCALAR_INT_MODE_P (mode)
19864 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
19865 && CONST_INT_P (XEXP (x, 1)))
19867 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19868 if (value == 1)
19870 *total = cost->add;
19871 return false;
19873 if ((value == 2 || value == 3)
19874 && cost->lea <= cost->shift_const)
19876 *total = cost->lea;
19877 return false;
19880 /* FALLTHRU */
19882 case ROTATE:
19883 case ASHIFTRT:
19884 case LSHIFTRT:
19885 case ROTATERT:
19886 bool skip_op0, skip_op1;
19887 *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)),
19888 CONST_INT_P (XEXP (x, 1))
19889 ? INTVAL (XEXP (x, 1)) : -1,
19890 speed,
19891 GET_CODE (XEXP (x, 1)) == AND,
19892 SUBREG_P (XEXP (x, 1))
19893 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND,
19894 &skip_op0, &skip_op1);
19895 if (skip_op0 || skip_op1)
19897 if (!skip_op0)
19898 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
19899 if (!skip_op1)
19900 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
19901 return true;
19903 return false;
19905 case FMA:
19907 rtx sub;
19909 gcc_assert (FLOAT_MODE_P (mode));
19910 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
19912 *total = ix86_vec_cost (mode,
19913 GET_MODE_INNER (mode) == SFmode
19914 ? cost->fmass : cost->fmasd);
19915 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
19917 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
19918 sub = XEXP (x, 0);
19919 if (GET_CODE (sub) == NEG)
19920 sub = XEXP (sub, 0);
19921 *total += rtx_cost (sub, mode, FMA, 0, speed);
19923 sub = XEXP (x, 2);
19924 if (GET_CODE (sub) == NEG)
19925 sub = XEXP (sub, 0);
19926 *total += rtx_cost (sub, mode, FMA, 2, speed);
19927 return true;
19930 case MULT:
19931 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
19933 rtx op0 = XEXP (x, 0);
19934 rtx op1 = XEXP (x, 1);
19935 int nbits;
19936 if (CONST_INT_P (XEXP (x, 1)))
19938 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19939 for (nbits = 0; value != 0; value &= value - 1)
19940 nbits++;
19942 else
19943 /* This is arbitrary. */
19944 nbits = 7;
19946 /* Compute costs correctly for widening multiplication. */
19947 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
19948 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19949 == GET_MODE_SIZE (mode))
19951 int is_mulwiden = 0;
19952 machine_mode inner_mode = GET_MODE (op0);
19954 if (GET_CODE (op0) == GET_CODE (op1))
19955 is_mulwiden = 1, op1 = XEXP (op1, 0);
19956 else if (CONST_INT_P (op1))
19958 if (GET_CODE (op0) == SIGN_EXTEND)
19959 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19960 == INTVAL (op1);
19961 else
19962 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19965 if (is_mulwiden)
19966 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19969 *total = (cost->mult_init[MODE_INDEX (mode)]
19970 + nbits * cost->mult_bit
19971 + rtx_cost (op0, mode, outer_code, opno, speed)
19972 + rtx_cost (op1, mode, outer_code, opno, speed));
19974 return true;
19976 *total = ix86_multiplication_cost (cost, mode);
19977 return false;
19979 case DIV:
19980 case UDIV:
19981 case MOD:
19982 case UMOD:
19983 *total = ix86_division_cost (cost, mode);
19984 return false;
19986 case PLUS:
19987 if (GET_MODE_CLASS (mode) == MODE_INT
19988 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
19990 if (GET_CODE (XEXP (x, 0)) == PLUS
19991 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19992 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19993 && CONSTANT_P (XEXP (x, 1)))
19995 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19996 if (val == 2 || val == 4 || val == 8)
19998 *total = cost->lea;
19999 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
20000 outer_code, opno, speed);
20001 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
20002 outer_code, opno, speed);
20003 *total += rtx_cost (XEXP (x, 1), mode,
20004 outer_code, opno, speed);
20005 return true;
20008 else if (GET_CODE (XEXP (x, 0)) == MULT
20009 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20011 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20012 if (val == 2 || val == 4 || val == 8)
20014 *total = cost->lea;
20015 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
20016 outer_code, opno, speed);
20017 *total += rtx_cost (XEXP (x, 1), mode,
20018 outer_code, opno, speed);
20019 return true;
20022 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20024 /* Add with carry, ignore the cost of adding a carry flag. */
20025 if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
20026 *total = cost->add;
20027 else
20029 *total = cost->lea;
20030 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
20031 outer_code, opno, speed);
20034 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
20035 outer_code, opno, speed);
20036 *total += rtx_cost (XEXP (x, 1), mode,
20037 outer_code, opno, speed);
20038 return true;
20041 /* FALLTHRU */
20043 case MINUS:
20044 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
20045 if (GET_MODE_CLASS (mode) == MODE_INT
20046 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
20047 && GET_CODE (XEXP (x, 0)) == MINUS
20048 && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
20050 *total = cost->add;
20051 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
20052 outer_code, opno, speed);
20053 *total += rtx_cost (XEXP (x, 1), mode,
20054 outer_code, opno, speed);
20055 return true;
20058 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20060 *total = cost->addss;
20061 return false;
20063 else if (X87_FLOAT_MODE_P (mode))
20065 *total = cost->fadd;
20066 return false;
20068 else if (FLOAT_MODE_P (mode))
20070 *total = ix86_vec_cost (mode, cost->addss);
20071 return false;
20073 /* FALLTHRU */
20075 case AND:
20076 case IOR:
20077 case XOR:
20078 if (GET_MODE_CLASS (mode) == MODE_INT
20079 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20081 *total = (cost->add * 2
20082 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
20083 << (GET_MODE (XEXP (x, 0)) != DImode))
20084 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
20085 << (GET_MODE (XEXP (x, 1)) != DImode)));
20086 return true;
20088 /* FALLTHRU */
20090 case NEG:
20091 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20093 *total = cost->sse_op;
20094 return false;
20096 else if (X87_FLOAT_MODE_P (mode))
20098 *total = cost->fchs;
20099 return false;
20101 else if (FLOAT_MODE_P (mode))
20103 *total = ix86_vec_cost (mode, cost->sse_op);
20104 return false;
20106 /* FALLTHRU */
20108 case NOT:
20109 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
20110 *total = ix86_vec_cost (mode, cost->sse_op);
20111 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20112 *total = cost->add * 2;
20113 else
20114 *total = cost->add;
20115 return false;
20117 case COMPARE:
20118 rtx op0, op1;
20119 op0 = XEXP (x, 0);
20120 op1 = XEXP (x, 1);
20121 if (GET_CODE (op0) == ZERO_EXTRACT
20122 && XEXP (op0, 1) == const1_rtx
20123 && CONST_INT_P (XEXP (op0, 2))
20124 && op1 == const0_rtx)
20126 /* This kind of construct is implemented using test[bwl].
20127 Treat it as if we had an AND. */
20128 mode = GET_MODE (XEXP (op0, 0));
20129 *total = (cost->add
20130 + rtx_cost (XEXP (op0, 0), mode, outer_code,
20131 opno, speed)
20132 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
20133 return true;
20136 if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
20138 /* This is an overflow detection, count it as a normal compare. */
20139 *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
20140 return true;
20143 rtx geu;
20144 /* Match x
20145 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
20146 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
20147 if (mode == CCCmode
20148 && GET_CODE (op0) == NEG
20149 && GET_CODE (geu = XEXP (op0, 0)) == GEU
20150 && REG_P (XEXP (geu, 0))
20151 && (GET_MODE (XEXP (geu, 0)) == CCCmode
20152 || GET_MODE (XEXP (geu, 0)) == CCmode)
20153 && REGNO (XEXP (geu, 0)) == FLAGS_REG
20154 && XEXP (geu, 1) == const0_rtx
20155 && GET_CODE (op1) == LTU
20156 && REG_P (XEXP (op1, 0))
20157 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
20158 && REGNO (XEXP (op1, 0)) == FLAGS_REG
20159 && XEXP (op1, 1) == const0_rtx)
20161 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
20162 *total = 0;
20163 return true;
20166 /* The embedded comparison operand is completely free. */
20167 if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
20168 *total = 0;
20170 return false;
20172 case FLOAT_EXTEND:
20173 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20174 *total = 0;
20175 else
20176 *total = ix86_vec_cost (mode, cost->addss);
20177 return false;
20179 case FLOAT_TRUNCATE:
20180 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20181 *total = cost->fadd;
20182 else
20183 *total = ix86_vec_cost (mode, cost->addss);
20184 return false;
20186 case ABS:
20187 /* SSE requires memory load for the constant operand. It may make
20188 sense to account for this. Of course the constant operand may or
20189 may not be reused. */
20190 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20191 *total = cost->sse_op;
20192 else if (X87_FLOAT_MODE_P (mode))
20193 *total = cost->fabs;
20194 else if (FLOAT_MODE_P (mode))
20195 *total = ix86_vec_cost (mode, cost->sse_op);
20196 return false;
20198 case SQRT:
20199 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20200 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
20201 else if (X87_FLOAT_MODE_P (mode))
20202 *total = cost->fsqrt;
20203 else if (FLOAT_MODE_P (mode))
20204 *total = ix86_vec_cost (mode,
20205 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
20206 return false;
20208 case UNSPEC:
20209 if (XINT (x, 1) == UNSPEC_TP)
20210 *total = 0;
20211 return false;
20213 case VEC_SELECT:
20214 case VEC_CONCAT:
20215 case VEC_DUPLICATE:
20216 /* ??? Assume all of these vector manipulation patterns are
20217 recognizable. In which case they all pretty much have the
20218 same cost. */
20219 *total = cost->sse_op;
20220 return true;
20221 case VEC_MERGE:
20222 mask = XEXP (x, 2);
20223 /* This is masked instruction, assume the same cost,
20224 as nonmasked variant. */
20225 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
20226 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
20227 else
20228 *total = cost->sse_op;
20229 return true;
20231 default:
20232 return false;
20236 #if TARGET_MACHO
20238 static int current_machopic_label_num;
20240 /* Given a symbol name and its associated stub, write out the
20241 definition of the stub. */
20243 void
20244 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20246 unsigned int length;
20247 char *binder_name, *symbol_name, lazy_ptr_name[32];
20248 int label = ++current_machopic_label_num;
20250 /* For 64-bit we shouldn't get here. */
20251 gcc_assert (!TARGET_64BIT);
20253 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20254 symb = targetm.strip_name_encoding (symb);
20256 length = strlen (stub);
20257 binder_name = XALLOCAVEC (char, length + 32);
20258 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20260 length = strlen (symb);
20261 symbol_name = XALLOCAVEC (char, length + 32);
20262 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20264 sprintf (lazy_ptr_name, "L%d$lz", label);
20266 if (MACHOPIC_ATT_STUB)
20267 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
20268 else if (MACHOPIC_PURE)
20269 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
20270 else
20271 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
20273 fprintf (file, "%s:\n", stub);
20274 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20276 if (MACHOPIC_ATT_STUB)
20278 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
20280 else if (MACHOPIC_PURE)
20282 /* PIC stub. */
20283 /* 25-byte PIC stub using "CALL get_pc_thunk". */
20284 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
20285 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
20286 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
20287 label, lazy_ptr_name, label);
20288 fprintf (file, "\tjmp\t*%%ecx\n");
20290 else
20291 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
20293 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
20294 it needs no stub-binding-helper. */
20295 if (MACHOPIC_ATT_STUB)
20296 return;
20298 fprintf (file, "%s:\n", binder_name);
20300 if (MACHOPIC_PURE)
20302 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
20303 fprintf (file, "\tpushl\t%%ecx\n");
20305 else
20306 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
20308 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
20310 /* N.B. Keep the correspondence of these
20311 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
20312 old-pic/new-pic/non-pic stubs; altering this will break
20313 compatibility with existing dylibs. */
20314 if (MACHOPIC_PURE)
20316 /* 25-byte PIC stub using "CALL get_pc_thunk". */
20317 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
20319 else
20320 /* 16-byte -mdynamic-no-pic stub. */
20321 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
20323 fprintf (file, "%s:\n", lazy_ptr_name);
20324 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20325 fprintf (file, ASM_LONG "%s\n", binder_name);
20327 #endif /* TARGET_MACHO */
20329 /* Order the registers for register allocator. */
20331 void
20332 x86_order_regs_for_local_alloc (void)
20334 int pos = 0;
20335 int i;
20337 /* First allocate the local general purpose registers. */
20338 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20339 if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
20340 reg_alloc_order [pos++] = i;
20342 /* Global general purpose registers. */
20343 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20344 if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
20345 reg_alloc_order [pos++] = i;
20347 /* x87 registers come first in case we are doing FP math
20348 using them. */
20349 if (!TARGET_SSE_MATH)
20350 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20351 reg_alloc_order [pos++] = i;
20353 /* SSE registers. */
20354 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20355 reg_alloc_order [pos++] = i;
20356 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20357 reg_alloc_order [pos++] = i;
20359 /* Extended REX SSE registers. */
20360 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
20361 reg_alloc_order [pos++] = i;
20363 /* Mask register. */
20364 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
20365 reg_alloc_order [pos++] = i;
20367 /* x87 registers. */
20368 if (TARGET_SSE_MATH)
20369 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20370 reg_alloc_order [pos++] = i;
20372 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20373 reg_alloc_order [pos++] = i;
20375 /* Initialize the rest of array as we do not allocate some registers
20376 at all. */
20377 while (pos < FIRST_PSEUDO_REGISTER)
20378 reg_alloc_order [pos++] = 0;
20381 static bool
20382 ix86_ms_bitfield_layout_p (const_tree record_type)
20384 return ((TARGET_MS_BITFIELD_LAYOUT
20385 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20386 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
20389 /* Returns an expression indicating where the this parameter is
20390 located on entry to the FUNCTION. */
20392 static rtx
20393 x86_this_parameter (tree function)
20395 tree type = TREE_TYPE (function);
20396 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
20397 int nregs;
20399 if (TARGET_64BIT)
20401 const int *parm_regs;
20403 if (ix86_function_type_abi (type) == MS_ABI)
20404 parm_regs = x86_64_ms_abi_int_parameter_registers;
20405 else
20406 parm_regs = x86_64_int_parameter_registers;
20407 return gen_rtx_REG (Pmode, parm_regs[aggr]);
20410 nregs = ix86_function_regparm (type, function);
20412 if (nregs > 0 && !stdarg_p (type))
20414 int regno;
20415 unsigned int ccvt = ix86_get_callcvt (type);
20417 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
20418 regno = aggr ? DX_REG : CX_REG;
20419 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
20421 regno = CX_REG;
20422 if (aggr)
20423 return gen_rtx_MEM (SImode,
20424 plus_constant (Pmode, stack_pointer_rtx, 4));
20426 else
20428 regno = AX_REG;
20429 if (aggr)
20431 regno = DX_REG;
20432 if (nregs == 1)
20433 return gen_rtx_MEM (SImode,
20434 plus_constant (Pmode,
20435 stack_pointer_rtx, 4));
20438 return gen_rtx_REG (SImode, regno);
20441 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
20442 aggr ? 8 : 4));
20445 /* Determine whether x86_output_mi_thunk can succeed. */
20447 static bool
20448 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
20449 const_tree function)
20451 /* 64-bit can handle anything. */
20452 if (TARGET_64BIT)
20453 return true;
20455 /* For 32-bit, everything's fine if we have one free register. */
20456 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
20457 return true;
20459 /* Need a free register for vcall_offset. */
20460 if (vcall_offset)
20461 return false;
20463 /* Need a free register for GOT references. */
20464 if (flag_pic && !targetm.binds_local_p (function))
20465 return false;
20467 /* Otherwise ok. */
20468 return true;
20471 /* Output the assembler code for a thunk function. THUNK_DECL is the
20472 declaration for the thunk function itself, FUNCTION is the decl for
20473 the target function. DELTA is an immediate constant offset to be
20474 added to THIS. If VCALL_OFFSET is nonzero, the word at
20475 *(*this + vcall_offset) should be added to THIS. */
20477 static void
20478 x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
20479 HOST_WIDE_INT vcall_offset, tree function)
20481 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
20482 rtx this_param = x86_this_parameter (function);
20483 rtx this_reg, tmp, fnaddr;
20484 unsigned int tmp_regno;
20485 rtx_insn *insn;
20487 if (TARGET_64BIT)
20488 tmp_regno = R10_REG;
20489 else
20491 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
20492 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
20493 tmp_regno = AX_REG;
20494 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
20495 tmp_regno = DX_REG;
20496 else
20497 tmp_regno = CX_REG;
20500 emit_note (NOTE_INSN_PROLOGUE_END);
20502 /* CET is enabled, insert EB instruction. */
20503 if ((flag_cf_protection & CF_BRANCH))
20504 emit_insn (gen_nop_endbr ());
20506 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
20507 pull it in now and let DELTA benefit. */
20508 if (REG_P (this_param))
20509 this_reg = this_param;
20510 else if (vcall_offset)
20512 /* Put the this parameter into %eax. */
20513 this_reg = gen_rtx_REG (Pmode, AX_REG);
20514 emit_move_insn (this_reg, this_param);
20516 else
20517 this_reg = NULL_RTX;
20519 /* Adjust the this parameter by a fixed constant. */
20520 if (delta)
20522 rtx delta_rtx = GEN_INT (delta);
20523 rtx delta_dst = this_reg ? this_reg : this_param;
20525 if (TARGET_64BIT)
20527 if (!x86_64_general_operand (delta_rtx, Pmode))
20529 tmp = gen_rtx_REG (Pmode, tmp_regno);
20530 emit_move_insn (tmp, delta_rtx);
20531 delta_rtx = tmp;
20535 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
20538 /* Adjust the this parameter by a value stored in the vtable. */
20539 if (vcall_offset)
20541 rtx vcall_addr, vcall_mem, this_mem;
20543 tmp = gen_rtx_REG (Pmode, tmp_regno);
20545 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
20546 if (Pmode != ptr_mode)
20547 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
20548 emit_move_insn (tmp, this_mem);
20550 /* Adjust the this parameter. */
20551 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
20552 if (TARGET_64BIT
20553 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
20555 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
20556 emit_move_insn (tmp2, GEN_INT (vcall_offset));
20557 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
20560 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
20561 if (Pmode != ptr_mode)
20562 emit_insn (gen_addsi_1_zext (this_reg,
20563 gen_rtx_REG (ptr_mode,
20564 REGNO (this_reg)),
20565 vcall_mem));
20566 else
20567 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
20570 /* If necessary, drop THIS back to its stack slot. */
20571 if (this_reg && this_reg != this_param)
20572 emit_move_insn (this_param, this_reg);
20574 fnaddr = XEXP (DECL_RTL (function), 0);
20575 if (TARGET_64BIT)
20577 if (!flag_pic || targetm.binds_local_p (function)
20578 || TARGET_PECOFF)
20580 else
20582 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
20583 tmp = gen_rtx_CONST (Pmode, tmp);
20584 fnaddr = gen_const_mem (Pmode, tmp);
20587 else
20589 if (!flag_pic || targetm.binds_local_p (function))
20591 #if TARGET_MACHO
20592 else if (TARGET_MACHO)
20594 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
20595 fnaddr = XEXP (fnaddr, 0);
20597 #endif /* TARGET_MACHO */
20598 else
20600 tmp = gen_rtx_REG (Pmode, CX_REG);
20601 output_set_got (tmp, NULL_RTX);
20603 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
20604 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
20605 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
20606 fnaddr = gen_const_mem (Pmode, fnaddr);
20610 /* Our sibling call patterns do not allow memories, because we have no
20611 predicate that can distinguish between frame and non-frame memory.
20612 For our purposes here, we can get away with (ab)using a jump pattern,
20613 because we're going to do no optimization. */
20614 if (MEM_P (fnaddr))
20616 if (sibcall_insn_operand (fnaddr, word_mode))
20618 fnaddr = XEXP (DECL_RTL (function), 0);
20619 tmp = gen_rtx_MEM (QImode, fnaddr);
20620 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
20621 tmp = emit_call_insn (tmp);
20622 SIBLING_CALL_P (tmp) = 1;
20624 else
20625 emit_jump_insn (gen_indirect_jump (fnaddr));
20627 else
20629 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
20631 // CM_LARGE_PIC always uses pseudo PIC register which is
20632 // uninitialized. Since FUNCTION is local and calling it
20633 // doesn't go through PLT, we use scratch register %r11 as
20634 // PIC register and initialize it here.
20635 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
20636 ix86_init_large_pic_reg (tmp_regno);
20637 fnaddr = legitimize_pic_address (fnaddr,
20638 gen_rtx_REG (Pmode, tmp_regno));
20641 if (!sibcall_insn_operand (fnaddr, word_mode))
20643 tmp = gen_rtx_REG (word_mode, tmp_regno);
20644 if (GET_MODE (fnaddr) != word_mode)
20645 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
20646 emit_move_insn (tmp, fnaddr);
20647 fnaddr = tmp;
20650 tmp = gen_rtx_MEM (QImode, fnaddr);
20651 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
20652 tmp = emit_call_insn (tmp);
20653 SIBLING_CALL_P (tmp) = 1;
20655 emit_barrier ();
20657 /* Emit just enough of rest_of_compilation to get the insns emitted. */
20658 insn = get_insns ();
20659 shorten_branches (insn);
20660 assemble_start_function (thunk_fndecl, fnname);
20661 final_start_function (insn, file, 1);
20662 final (insn, file, 1);
20663 final_end_function ();
20664 assemble_end_function (thunk_fndecl, fnname);
20667 static void
20668 x86_file_start (void)
20670 default_file_start ();
20671 if (TARGET_16BIT)
20672 fputs ("\t.code16gcc\n", asm_out_file);
20673 #if TARGET_MACHO
20674 darwin_file_start ();
20675 #endif
20676 if (X86_FILE_START_VERSION_DIRECTIVE)
20677 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
20678 if (X86_FILE_START_FLTUSED)
20679 fputs ("\t.global\t__fltused\n", asm_out_file);
20680 if (ix86_asm_dialect == ASM_INTEL)
20681 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
20685 x86_field_alignment (tree type, int computed)
20687 machine_mode mode;
20689 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
20690 return computed;
20691 if (TARGET_IAMCU)
20692 return iamcu_alignment (type, computed);
20693 type = strip_array_types (type);
20694 mode = TYPE_MODE (type);
20695 if (mode == DFmode || mode == DCmode
20696 || GET_MODE_CLASS (mode) == MODE_INT
20697 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20699 if (TYPE_ATOMIC (type) && computed > 32)
20701 static bool warned;
20703 if (!warned && warn_psabi)
20705 const char *url
20706 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
20708 warned = true;
20709 inform (input_location, "the alignment of %<_Atomic %T%> "
20710 "fields changed in %{GCC 11.1%}",
20711 TYPE_MAIN_VARIANT (type), url);
20714 else
20715 return MIN (32, computed);
20717 return computed;
20720 /* Print call to TARGET to FILE. */
20722 static void
20723 x86_print_call_or_nop (FILE *file, const char *target)
20725 if (flag_nop_mcount || !strcmp (target, "nop"))
20726 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
20727 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
20728 else
20729 fprintf (file, "1:\tcall\t%s\n", target);
20732 static bool
20733 current_fentry_name (const char **name)
20735 tree attr = lookup_attribute ("fentry_name",
20736 DECL_ATTRIBUTES (current_function_decl));
20737 if (!attr)
20738 return false;
20739 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20740 return true;
20743 static bool
20744 current_fentry_section (const char **name)
20746 tree attr = lookup_attribute ("fentry_section",
20747 DECL_ATTRIBUTES (current_function_decl));
20748 if (!attr)
20749 return false;
20750 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20751 return true;
20754 /* Output assembler code to FILE to increment profiler label # LABELNO
20755 for profiling a function entry. */
20756 void
20757 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20759 if (cfun->machine->insn_queued_at_entrance)
20761 if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
20762 fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
20763 unsigned int patch_area_size
20764 = crtl->patch_area_size - crtl->patch_area_entry;
20765 if (patch_area_size)
20766 ix86_output_patchable_area (patch_area_size,
20767 crtl->patch_area_entry == 0);
20770 const char *mcount_name = MCOUNT_NAME;
20772 if (current_fentry_name (&mcount_name))
20774 else if (fentry_name)
20775 mcount_name = fentry_name;
20776 else if (flag_fentry)
20777 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
20779 if (TARGET_64BIT)
20781 #ifndef NO_PROFILE_COUNTERS
20782 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
20783 #endif
20785 if (!TARGET_PECOFF && flag_pic)
20786 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
20787 else
20788 x86_print_call_or_nop (file, mcount_name);
20790 else if (flag_pic)
20792 #ifndef NO_PROFILE_COUNTERS
20793 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
20794 LPREFIX, labelno);
20795 #endif
20796 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
20798 else
20800 #ifndef NO_PROFILE_COUNTERS
20801 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
20802 LPREFIX, labelno);
20803 #endif
20804 x86_print_call_or_nop (file, mcount_name);
20807 if (flag_record_mcount
20808 || lookup_attribute ("fentry_section",
20809 DECL_ATTRIBUTES (current_function_decl)))
20811 const char *sname = "__mcount_loc";
20813 if (current_fentry_section (&sname))
20815 else if (fentry_section)
20816 sname = fentry_section;
20818 fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
20819 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
20820 fprintf (file, "\t.previous\n");
20824 /* We don't have exact information about the insn sizes, but we may assume
20825 quite safely that we are informed about all 1 byte insns and memory
20826 address sizes. This is enough to eliminate unnecessary padding in
20827 99% of cases. */
20830 ix86_min_insn_size (rtx_insn *insn)
20832 int l = 0, len;
20834 if (!INSN_P (insn) || !active_insn_p (insn))
20835 return 0;
20837 /* Discard alignments we've emit and jump instructions. */
20838 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20839 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20840 return 0;
20842 /* Important case - calls are always 5 bytes.
20843 It is common to have many calls in the row. */
20844 if (CALL_P (insn)
20845 && symbolic_reference_mentioned_p (PATTERN (insn))
20846 && !SIBLING_CALL_P (insn))
20847 return 5;
20848 len = get_attr_length (insn);
20849 if (len <= 1)
20850 return 1;
20852 /* For normal instructions we rely on get_attr_length being exact,
20853 with a few exceptions. */
20854 if (!JUMP_P (insn))
20856 enum attr_type type = get_attr_type (insn);
20858 switch (type)
20860 case TYPE_MULTI:
20861 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
20862 || asm_noperands (PATTERN (insn)) >= 0)
20863 return 0;
20864 break;
20865 case TYPE_OTHER:
20866 case TYPE_FCMP:
20867 break;
20868 default:
20869 /* Otherwise trust get_attr_length. */
20870 return len;
20873 l = get_attr_length_address (insn);
20874 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20875 l = 4;
20877 if (l)
20878 return 1+l;
20879 else
20880 return 2;
20883 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20885 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20886 window. */
20888 static void
20889 ix86_avoid_jump_mispredicts (void)
20891 rtx_insn *insn, *start = get_insns ();
20892 int nbytes = 0, njumps = 0;
20893 bool isjump = false;
20895 /* Look for all minimal intervals of instructions containing 4 jumps.
20896 The intervals are bounded by START and INSN. NBYTES is the total
20897 size of instructions in the interval including INSN and not including
20898 START. When the NBYTES is smaller than 16 bytes, it is possible
20899 that the end of START and INSN ends up in the same 16byte page.
20901 The smallest offset in the page INSN can start is the case where START
20902 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20903 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
20905 Don't consider asm goto as jump, while it can contain a jump, it doesn't
20906 have to, control transfer to label(s) can be performed through other
20907 means, and also we estimate minimum length of all asm stmts as 0. */
20908 for (insn = start; insn; insn = NEXT_INSN (insn))
20910 int min_size;
20912 if (LABEL_P (insn))
20914 align_flags alignment = label_to_alignment (insn);
20915 int align = alignment.levels[0].log;
20916 int max_skip = alignment.levels[0].maxskip;
20918 if (max_skip > 15)
20919 max_skip = 15;
20920 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
20921 already in the current 16 byte page, because otherwise
20922 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
20923 bytes to reach 16 byte boundary. */
20924 if (align <= 0
20925 || (align <= 3 && max_skip != (1 << align) - 1))
20926 max_skip = 0;
20927 if (dump_file)
20928 fprintf (dump_file, "Label %i with max_skip %i\n",
20929 INSN_UID (insn), max_skip);
20930 if (max_skip)
20932 while (nbytes + max_skip >= 16)
20934 start = NEXT_INSN (start);
20935 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20936 || CALL_P (start))
20937 njumps--, isjump = true;
20938 else
20939 isjump = false;
20940 nbytes -= ix86_min_insn_size (start);
20943 continue;
20946 min_size = ix86_min_insn_size (insn);
20947 nbytes += min_size;
20948 if (dump_file)
20949 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
20950 INSN_UID (insn), min_size);
20951 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
20952 || CALL_P (insn))
20953 njumps++;
20954 else
20955 continue;
20957 while (njumps > 3)
20959 start = NEXT_INSN (start);
20960 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20961 || CALL_P (start))
20962 njumps--, isjump = true;
20963 else
20964 isjump = false;
20965 nbytes -= ix86_min_insn_size (start);
20967 gcc_assert (njumps >= 0);
20968 if (dump_file)
20969 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20970 INSN_UID (start), INSN_UID (insn), nbytes);
20972 if (njumps == 3 && isjump && nbytes < 16)
20974 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
20976 if (dump_file)
20977 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20978 INSN_UID (insn), padsize);
20979 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
20983 #endif
20985 /* AMD Athlon works faster
20986 when RET is not destination of conditional jump or directly preceded
20987 by other jump instruction. We avoid the penalty by inserting NOP just
20988 before the RET instructions in such cases. */
20989 static void
20990 ix86_pad_returns (void)
20992 edge e;
20993 edge_iterator ei;
20995 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20997 basic_block bb = e->src;
20998 rtx_insn *ret = BB_END (bb);
20999 rtx_insn *prev;
21000 bool replace = false;
21002 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
21003 || optimize_bb_for_size_p (bb))
21004 continue;
21005 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
21006 if (active_insn_p (prev) || LABEL_P (prev))
21007 break;
21008 if (prev && LABEL_P (prev))
21010 edge e;
21011 edge_iterator ei;
21013 FOR_EACH_EDGE (e, ei, bb->preds)
21014 if (EDGE_FREQUENCY (e) && e->src->index >= 0
21015 && !(e->flags & EDGE_FALLTHRU))
21017 replace = true;
21018 break;
21021 if (!replace)
21023 prev = prev_active_insn (ret);
21024 if (prev
21025 && ((JUMP_P (prev) && any_condjump_p (prev))
21026 || CALL_P (prev)))
21027 replace = true;
21028 /* Empty functions get branch mispredict even when
21029 the jump destination is not visible to us. */
21030 if (!prev && !optimize_function_for_size_p (cfun))
21031 replace = true;
21033 if (replace)
21035 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
21036 delete_insn (ret);
21041 /* Count the minimum number of instructions in BB. Return 4 if the
21042 number of instructions >= 4. */
21044 static int
21045 ix86_count_insn_bb (basic_block bb)
21047 rtx_insn *insn;
21048 int insn_count = 0;
21050 /* Count number of instructions in this block. Return 4 if the number
21051 of instructions >= 4. */
21052 FOR_BB_INSNS (bb, insn)
21054 /* Only happen in exit blocks. */
21055 if (JUMP_P (insn)
21056 && ANY_RETURN_P (PATTERN (insn)))
21057 break;
21059 if (NONDEBUG_INSN_P (insn)
21060 && GET_CODE (PATTERN (insn)) != USE
21061 && GET_CODE (PATTERN (insn)) != CLOBBER)
21063 insn_count++;
21064 if (insn_count >= 4)
21065 return insn_count;
21069 return insn_count;
21073 /* Count the minimum number of instructions in code path in BB.
21074 Return 4 if the number of instructions >= 4. */
21076 static int
21077 ix86_count_insn (basic_block bb)
21079 edge e;
21080 edge_iterator ei;
21081 int min_prev_count;
21083 /* Only bother counting instructions along paths with no
21084 more than 2 basic blocks between entry and exit. Given
21085 that BB has an edge to exit, determine if a predecessor
21086 of BB has an edge from entry. If so, compute the number
21087 of instructions in the predecessor block. If there
21088 happen to be multiple such blocks, compute the minimum. */
21089 min_prev_count = 4;
21090 FOR_EACH_EDGE (e, ei, bb->preds)
21092 edge prev_e;
21093 edge_iterator prev_ei;
21095 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
21097 min_prev_count = 0;
21098 break;
21100 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
21102 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
21104 int count = ix86_count_insn_bb (e->src);
21105 if (count < min_prev_count)
21106 min_prev_count = count;
21107 break;
21112 if (min_prev_count < 4)
21113 min_prev_count += ix86_count_insn_bb (bb);
21115 return min_prev_count;
21118 /* Pad short function to 4 instructions. */
21120 static void
21121 ix86_pad_short_function (void)
21123 edge e;
21124 edge_iterator ei;
21126 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21128 rtx_insn *ret = BB_END (e->src);
21129 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
21131 int insn_count = ix86_count_insn (e->src);
21133 /* Pad short function. */
21134 if (insn_count < 4)
21136 rtx_insn *insn = ret;
21138 /* Find epilogue. */
21139 while (insn
21140 && (!NOTE_P (insn)
21141 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
21142 insn = PREV_INSN (insn);
21144 if (!insn)
21145 insn = ret;
21147 /* Two NOPs count as one instruction. */
21148 insn_count = 2 * (4 - insn_count);
21149 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
21155 /* Fix up a Windows system unwinder issue. If an EH region falls through into
21156 the epilogue, the Windows system unwinder will apply epilogue logic and
21157 produce incorrect offsets. This can be avoided by adding a nop between
21158 the last insn that can throw and the first insn of the epilogue. */
21160 static void
21161 ix86_seh_fixup_eh_fallthru (void)
21163 edge e;
21164 edge_iterator ei;
21166 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21168 rtx_insn *insn, *next;
21170 /* Find the beginning of the epilogue. */
21171 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
21172 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
21173 break;
21174 if (insn == NULL)
21175 continue;
21177 /* We only care about preceding insns that can throw. */
21178 insn = prev_active_insn (insn);
21179 if (insn == NULL || !can_throw_internal (insn))
21180 continue;
21182 /* Do not separate calls from their debug information. */
21183 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
21184 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
21185 insn = next;
21186 else
21187 break;
21189 emit_insn_after (gen_nops (const1_rtx), insn);
21193 /* Implement machine specific optimizations. We implement padding of returns
21194 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21195 static void
21196 ix86_reorg (void)
21198 /* We are freeing block_for_insn in the toplev to keep compatibility
21199 with old MDEP_REORGS that are not CFG based. Recompute it now. */
21200 compute_bb_for_insn ();
21202 if (TARGET_SEH && current_function_has_exception_handlers ())
21203 ix86_seh_fixup_eh_fallthru ();
21205 if (optimize && optimize_function_for_speed_p (cfun))
21207 if (TARGET_PAD_SHORT_FUNCTION)
21208 ix86_pad_short_function ();
21209 else if (TARGET_PAD_RETURNS)
21210 ix86_pad_returns ();
21211 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
21212 if (TARGET_FOUR_JUMP_LIMIT)
21213 ix86_avoid_jump_mispredicts ();
21214 #endif
21218 /* Return nonzero when QImode register that must be represented via REX prefix
21219 is used. */
21220 bool
21221 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
21223 int i;
21224 extract_insn_cached (insn);
21225 for (i = 0; i < recog_data.n_operands; i++)
21226 if (GENERAL_REG_P (recog_data.operand[i])
21227 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
21228 return true;
21229 return false;
21232 /* Return true when INSN mentions register that must be encoded using REX
21233 prefix. */
21234 bool
21235 x86_extended_reg_mentioned_p (rtx insn)
21237 subrtx_iterator::array_type array;
21238 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
21240 const_rtx x = *iter;
21241 if (REG_P (x)
21242 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
21243 return true;
21245 return false;
21248 /* If profitable, negate (without causing overflow) integer constant
21249 of mode MODE at location LOC. Return true in this case. */
21250 bool
21251 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
21253 HOST_WIDE_INT val;
21255 if (!CONST_INT_P (*loc))
21256 return false;
21258 switch (mode)
21260 case E_DImode:
21261 /* DImode x86_64 constants must fit in 32 bits. */
21262 gcc_assert (x86_64_immediate_operand (*loc, mode));
21264 mode = SImode;
21265 break;
21267 case E_SImode:
21268 case E_HImode:
21269 case E_QImode:
21270 break;
21272 default:
21273 gcc_unreachable ();
21276 /* Avoid overflows. */
21277 if (mode_signbit_p (mode, *loc))
21278 return false;
21280 val = INTVAL (*loc);
21282 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
21283 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
21284 if ((val < 0 && val != -128)
21285 || val == 128)
21287 *loc = GEN_INT (-val);
21288 return true;
21291 return false;
21294 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21295 optabs would emit if we didn't have TFmode patterns. */
21297 void
21298 x86_emit_floatuns (rtx operands[2])
21300 rtx_code_label *neglab, *donelab;
21301 rtx i0, i1, f0, in, out;
21302 machine_mode mode, inmode;
21304 inmode = GET_MODE (operands[1]);
21305 gcc_assert (inmode == SImode || inmode == DImode);
21307 out = operands[0];
21308 in = force_reg (inmode, operands[1]);
21309 mode = GET_MODE (out);
21310 neglab = gen_label_rtx ();
21311 donelab = gen_label_rtx ();
21312 f0 = gen_reg_rtx (mode);
21314 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
21316 expand_float (out, in, 0);
21318 emit_jump_insn (gen_jump (donelab));
21319 emit_barrier ();
21321 emit_label (neglab);
21323 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
21324 1, OPTAB_DIRECT);
21325 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
21326 1, OPTAB_DIRECT);
21327 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
21329 expand_float (f0, i0, 0);
21331 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
21333 emit_label (donelab);
21336 /* Target hook for scalar_mode_supported_p. */
21337 static bool
21338 ix86_scalar_mode_supported_p (scalar_mode mode)
21340 if (DECIMAL_FLOAT_MODE_P (mode))
21341 return default_decimal_float_supported_p ();
21342 else if (mode == TFmode)
21343 return true;
21344 else
21345 return default_scalar_mode_supported_p (mode);
21348 /* Implements target hook vector_mode_supported_p. */
21349 static bool
21350 ix86_vector_mode_supported_p (machine_mode mode)
21352 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21353 return true;
21354 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21355 return true;
21356 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
21357 return true;
21358 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
21359 return true;
21360 if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
21361 && VALID_MMX_REG_MODE (mode))
21362 return true;
21363 if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
21364 && VALID_MMX_REG_MODE_3DNOW (mode))
21365 return true;
21366 return false;
21369 /* Target hook for c_mode_for_suffix. */
21370 static machine_mode
21371 ix86_c_mode_for_suffix (char suffix)
21373 if (suffix == 'q')
21374 return TFmode;
21375 if (suffix == 'w')
21376 return XFmode;
21378 return VOIDmode;
21381 /* Worker function for TARGET_MD_ASM_ADJUST.
21383 We implement asm flag outputs, and maintain source compatibility
21384 with the old cc0-based compiler. */
21386 static rtx_insn *
21387 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
21388 vec<const char *> &constraints,
21389 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
21391 bool saw_asm_flag = false;
21393 start_sequence ();
21394 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
21396 const char *con = constraints[i];
21397 if (strncmp (con, "=@cc", 4) != 0)
21398 continue;
21399 con += 4;
21400 if (strchr (con, ',') != NULL)
21402 error ("alternatives not allowed in %<asm%> flag output");
21403 continue;
21406 bool invert = false;
21407 if (con[0] == 'n')
21408 invert = true, con++;
21410 machine_mode mode = CCmode;
21411 rtx_code code = UNKNOWN;
21413 switch (con[0])
21415 case 'a':
21416 if (con[1] == 0)
21417 mode = CCAmode, code = EQ;
21418 else if (con[1] == 'e' && con[2] == 0)
21419 mode = CCCmode, code = NE;
21420 break;
21421 case 'b':
21422 if (con[1] == 0)
21423 mode = CCCmode, code = EQ;
21424 else if (con[1] == 'e' && con[2] == 0)
21425 mode = CCAmode, code = NE;
21426 break;
21427 case 'c':
21428 if (con[1] == 0)
21429 mode = CCCmode, code = EQ;
21430 break;
21431 case 'e':
21432 if (con[1] == 0)
21433 mode = CCZmode, code = EQ;
21434 break;
21435 case 'g':
21436 if (con[1] == 0)
21437 mode = CCGCmode, code = GT;
21438 else if (con[1] == 'e' && con[2] == 0)
21439 mode = CCGCmode, code = GE;
21440 break;
21441 case 'l':
21442 if (con[1] == 0)
21443 mode = CCGCmode, code = LT;
21444 else if (con[1] == 'e' && con[2] == 0)
21445 mode = CCGCmode, code = LE;
21446 break;
21447 case 'o':
21448 if (con[1] == 0)
21449 mode = CCOmode, code = EQ;
21450 break;
21451 case 'p':
21452 if (con[1] == 0)
21453 mode = CCPmode, code = EQ;
21454 break;
21455 case 's':
21456 if (con[1] == 0)
21457 mode = CCSmode, code = EQ;
21458 break;
21459 case 'z':
21460 if (con[1] == 0)
21461 mode = CCZmode, code = EQ;
21462 break;
21464 if (code == UNKNOWN)
21466 error ("unknown %<asm%> flag output %qs", constraints[i]);
21467 continue;
21469 if (invert)
21470 code = reverse_condition (code);
21472 rtx dest = outputs[i];
21473 if (!saw_asm_flag)
21475 /* This is the first asm flag output. Here we put the flags
21476 register in as the real output and adjust the condition to
21477 allow it. */
21478 constraints[i] = "=Bf";
21479 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
21480 saw_asm_flag = true;
21482 else
21484 /* We don't need the flags register as output twice. */
21485 constraints[i] = "=X";
21486 outputs[i] = gen_rtx_SCRATCH (SImode);
21489 rtx x = gen_rtx_REG (mode, FLAGS_REG);
21490 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
21492 machine_mode dest_mode = GET_MODE (dest);
21493 if (!SCALAR_INT_MODE_P (dest_mode))
21495 error ("invalid type for %<asm%> flag output");
21496 continue;
21499 if (dest_mode == DImode && !TARGET_64BIT)
21500 dest_mode = SImode;
21502 if (dest_mode != QImode)
21504 rtx destqi = gen_reg_rtx (QImode);
21505 emit_insn (gen_rtx_SET (destqi, x));
21507 if (TARGET_ZERO_EXTEND_WITH_AND
21508 && optimize_function_for_speed_p (cfun))
21510 x = force_reg (dest_mode, const0_rtx);
21512 emit_insn (gen_movstrictqi (gen_lowpart (QImode, x), destqi));
21514 else
21516 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
21517 if (dest_mode == GET_MODE (dest)
21518 && !register_operand (dest, GET_MODE (dest)))
21519 x = force_reg (dest_mode, x);
21523 if (dest_mode != GET_MODE (dest))
21525 rtx tmp = gen_reg_rtx (SImode);
21527 emit_insn (gen_rtx_SET (tmp, x));
21528 emit_insn (gen_zero_extendsidi2 (dest, tmp));
21530 else
21531 emit_insn (gen_rtx_SET (dest, x));
21533 rtx_insn *seq = get_insns ();
21534 end_sequence ();
21536 if (saw_asm_flag)
21537 return seq;
21538 else
21540 /* If we had no asm flag outputs, clobber the flags. */
21541 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
21542 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
21543 return NULL;
21547 /* Implements target vector targetm.asm.encode_section_info. */
21549 static void ATTRIBUTE_UNUSED
21550 ix86_encode_section_info (tree decl, rtx rtl, int first)
21552 default_encode_section_info (decl, rtl, first);
21554 if (ix86_in_large_data_p (decl))
21555 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21558 /* Worker function for REVERSE_CONDITION. */
21560 enum rtx_code
21561 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
21563 return (mode == CCFPmode
21564 ? reverse_condition_maybe_unordered (code)
21565 : reverse_condition (code));
21568 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21569 to OPERANDS[0]. */
21571 const char *
21572 output_387_reg_move (rtx_insn *insn, rtx *operands)
21574 if (REG_P (operands[0]))
21576 if (REG_P (operands[1])
21577 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21579 if (REGNO (operands[0]) == FIRST_STACK_REG)
21580 return output_387_ffreep (operands, 0);
21581 return "fstp\t%y0";
21583 if (STACK_TOP_P (operands[0]))
21584 return "fld%Z1\t%y1";
21585 return "fst\t%y0";
21587 else if (MEM_P (operands[0]))
21589 gcc_assert (REG_P (operands[1]));
21590 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21591 return "fstp%Z0\t%y0";
21592 else
21594 /* There is no non-popping store to memory for XFmode.
21595 So if we need one, follow the store with a load. */
21596 if (GET_MODE (operands[0]) == XFmode)
21597 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
21598 else
21599 return "fst%Z0\t%y0";
21602 else
21603 gcc_unreachable();
21605 #ifdef TARGET_SOLARIS
21606 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21608 static void
21609 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21610 tree decl)
21612 /* With Binutils 2.15, the "@unwind" marker must be specified on
21613 every occurrence of the ".eh_frame" section, not just the first
21614 one. */
21615 if (TARGET_64BIT
21616 && strcmp (name, ".eh_frame") == 0)
21618 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21619 flags & SECTION_WRITE ? "aw" : "a");
21620 return;
21623 #ifndef USE_GAS
21624 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
21626 solaris_elf_asm_comdat_section (name, flags, decl);
21627 return;
21630 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
21631 SPARC assembler. One cannot mix single-letter flags and #exclude, so
21632 only emit the latter here. */
21633 if (flags & SECTION_EXCLUDE)
21635 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
21636 return;
21638 #endif
21640 default_elf_asm_named_section (name, flags, decl);
21642 #endif /* TARGET_SOLARIS */
21644 /* Return the mangling of TYPE if it is an extended fundamental type. */
21646 static const char *
21647 ix86_mangle_type (const_tree type)
21649 type = TYPE_MAIN_VARIANT (type);
21651 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
21652 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
21653 return NULL;
21655 switch (TYPE_MODE (type))
21657 case E_TFmode:
21658 /* __float128 is "g". */
21659 return "g";
21660 case E_XFmode:
21661 /* "long double" or __float80 is "e". */
21662 return "e";
21663 default:
21664 return NULL;
21668 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
21670 static tree
21671 ix86_stack_protect_guard (void)
21673 if (TARGET_SSP_TLS_GUARD)
21675 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
21676 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
21677 tree type = build_qualified_type (type_node, qual);
21678 tree t;
21680 if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
21682 t = ix86_tls_stack_chk_guard_decl;
21684 if (t == NULL)
21686 rtx x;
21688 t = build_decl
21689 (UNKNOWN_LOCATION, VAR_DECL,
21690 get_identifier (ix86_stack_protector_guard_symbol_str),
21691 type);
21692 TREE_STATIC (t) = 1;
21693 TREE_PUBLIC (t) = 1;
21694 DECL_EXTERNAL (t) = 1;
21695 TREE_USED (t) = 1;
21696 TREE_THIS_VOLATILE (t) = 1;
21697 DECL_ARTIFICIAL (t) = 1;
21698 DECL_IGNORED_P (t) = 1;
21700 /* Do not share RTL as the declaration is visible outside of
21701 current function. */
21702 x = DECL_RTL (t);
21703 RTX_FLAG (x, used) = 1;
21705 ix86_tls_stack_chk_guard_decl = t;
21708 else
21710 tree asptrtype = build_pointer_type (type);
21712 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
21713 t = build2 (MEM_REF, asptrtype, t,
21714 build_int_cst (asptrtype, 0));
21715 TREE_THIS_VOLATILE (t) = 1;
21718 return t;
21721 return default_stack_protect_guard ();
21724 /* For 32-bit code we can save PIC register setup by using
21725 __stack_chk_fail_local hidden function instead of calling
21726 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21727 register, so it is better to call __stack_chk_fail directly. */
21729 static tree ATTRIBUTE_UNUSED
21730 ix86_stack_protect_fail (void)
21732 return TARGET_64BIT
21733 ? default_external_stack_protect_fail ()
21734 : default_hidden_stack_protect_fail ();
21737 /* Select a format to encode pointers in exception handling data. CODE
21738 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21739 true if the symbol may be affected by dynamic relocations.
21741 ??? All x86 object file formats are capable of representing this.
21742 After all, the relocation needed is the same as for the call insn.
21743 Whether or not a particular assembler allows us to enter such, I
21744 guess we'll have to see. */
21746 asm_preferred_eh_data_format (int code, int global)
21748 if (flag_pic)
21750 int type = DW_EH_PE_sdata8;
21751 if (!TARGET_64BIT
21752 || ix86_cmodel == CM_SMALL_PIC
21753 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21754 type = DW_EH_PE_sdata4;
21755 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21757 if (ix86_cmodel == CM_SMALL
21758 || (ix86_cmodel == CM_MEDIUM && code))
21759 return DW_EH_PE_udata4;
21760 return DW_EH_PE_absptr;
21763 /* Implement targetm.vectorize.builtin_vectorization_cost. */
21764 static int
21765 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
21766 tree vectype, int)
21768 bool fp = false;
21769 machine_mode mode = TImode;
21770 int index;
21771 if (vectype != NULL)
21773 fp = FLOAT_TYPE_P (vectype);
21774 mode = TYPE_MODE (vectype);
21777 switch (type_of_cost)
21779 case scalar_stmt:
21780 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
21782 case scalar_load:
21783 /* load/store costs are relative to register move which is 2. Recompute
21784 it to COSTS_N_INSNS so everything have same base. */
21785 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
21786 : ix86_cost->int_load [2]) / 2;
21788 case scalar_store:
21789 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
21790 : ix86_cost->int_store [2]) / 2;
21792 case vector_stmt:
21793 return ix86_vec_cost (mode,
21794 fp ? ix86_cost->addss : ix86_cost->sse_op);
21796 case vector_load:
21797 index = sse_store_index (mode);
21798 /* See PR82713 - we may end up being called on non-vector type. */
21799 if (index < 0)
21800 index = 2;
21801 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
21803 case vector_store:
21804 index = sse_store_index (mode);
21805 /* See PR82713 - we may end up being called on non-vector type. */
21806 if (index < 0)
21807 index = 2;
21808 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
21810 case vec_to_scalar:
21811 case scalar_to_vec:
21812 return ix86_vec_cost (mode, ix86_cost->sse_op);
21814 /* We should have separate costs for unaligned loads and gather/scatter.
21815 Do that incrementally. */
21816 case unaligned_load:
21817 index = sse_store_index (mode);
21818 /* See PR82713 - we may end up being called on non-vector type. */
21819 if (index < 0)
21820 index = 2;
21821 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
21823 case unaligned_store:
21824 index = sse_store_index (mode);
21825 /* See PR82713 - we may end up being called on non-vector type. */
21826 if (index < 0)
21827 index = 2;
21828 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
21830 case vector_gather_load:
21831 return ix86_vec_cost (mode,
21832 COSTS_N_INSNS
21833 (ix86_cost->gather_static
21834 + ix86_cost->gather_per_elt
21835 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21837 case vector_scatter_store:
21838 return ix86_vec_cost (mode,
21839 COSTS_N_INSNS
21840 (ix86_cost->scatter_static
21841 + ix86_cost->scatter_per_elt
21842 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21844 case cond_branch_taken:
21845 return ix86_cost->cond_taken_branch_cost;
21847 case cond_branch_not_taken:
21848 return ix86_cost->cond_not_taken_branch_cost;
21850 case vec_perm:
21851 case vec_promote_demote:
21852 return ix86_vec_cost (mode, ix86_cost->sse_op);
21854 case vec_construct:
21856 /* N element inserts into SSE vectors. */
21857 int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
21858 /* One vinserti128 for combining two SSE vectors for AVX256. */
21859 if (GET_MODE_BITSIZE (mode) == 256)
21860 cost += ix86_vec_cost (mode, ix86_cost->addss);
21861 /* One vinserti64x4 and two vinserti128 for combining SSE
21862 and AVX256 vectors to AVX512. */
21863 else if (GET_MODE_BITSIZE (mode) == 512)
21864 cost += 3 * ix86_vec_cost (mode, ix86_cost->addss);
21865 return cost;
21868 default:
21869 gcc_unreachable ();
21874 /* This function returns the calling abi specific va_list type node.
21875 It returns the FNDECL specific va_list type. */
21877 static tree
21878 ix86_fn_abi_va_list (tree fndecl)
21880 if (!TARGET_64BIT)
21881 return va_list_type_node;
21882 gcc_assert (fndecl != NULL_TREE);
21884 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
21885 return ms_va_list_type_node;
21886 else
21887 return sysv_va_list_type_node;
21890 /* Returns the canonical va_list type specified by TYPE. If there
21891 is no valid TYPE provided, it return NULL_TREE. */
21893 static tree
21894 ix86_canonical_va_list_type (tree type)
21896 if (TARGET_64BIT)
21898 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
21899 return ms_va_list_type_node;
21901 if ((TREE_CODE (type) == ARRAY_TYPE
21902 && integer_zerop (array_type_nelts (type)))
21903 || POINTER_TYPE_P (type))
21905 tree elem_type = TREE_TYPE (type);
21906 if (TREE_CODE (elem_type) == RECORD_TYPE
21907 && lookup_attribute ("sysv_abi va_list",
21908 TYPE_ATTRIBUTES (elem_type)))
21909 return sysv_va_list_type_node;
21912 return NULL_TREE;
21915 return std_canonical_va_list_type (type);
21918 /* Iterate through the target-specific builtin types for va_list.
21919 IDX denotes the iterator, *PTREE is set to the result type of
21920 the va_list builtin, and *PNAME to its internal type.
21921 Returns zero if there is no element for this index, otherwise
21922 IDX should be increased upon the next call.
21923 Note, do not iterate a base builtin's name like __builtin_va_list.
21924 Used from c_common_nodes_and_builtins. */
21926 static int
21927 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
21929 if (TARGET_64BIT)
21931 switch (idx)
21933 default:
21934 break;
21936 case 0:
21937 *ptree = ms_va_list_type_node;
21938 *pname = "__builtin_ms_va_list";
21939 return 1;
21941 case 1:
21942 *ptree = sysv_va_list_type_node;
21943 *pname = "__builtin_sysv_va_list";
21944 return 1;
21948 return 0;
21951 #undef TARGET_SCHED_DISPATCH
21952 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
21953 #undef TARGET_SCHED_DISPATCH_DO
21954 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
21955 #undef TARGET_SCHED_REASSOCIATION_WIDTH
21956 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
21957 #undef TARGET_SCHED_REORDER
21958 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
21959 #undef TARGET_SCHED_ADJUST_PRIORITY
21960 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
21961 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
21962 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
21963 ix86_dependencies_evaluation_hook
21966 /* Implementation of reassociation_width target hook used by
21967 reassoc phase to identify parallelism level in reassociated
21968 tree. Statements tree_code is passed in OPC. Arguments type
21969 is passed in MODE. */
21971 static int
21972 ix86_reassociation_width (unsigned int op, machine_mode mode)
21974 int width = 1;
21975 /* Vector part. */
21976 if (VECTOR_MODE_P (mode))
21978 int div = 1;
21979 if (INTEGRAL_MODE_P (mode))
21980 width = ix86_cost->reassoc_vec_int;
21981 else if (FLOAT_MODE_P (mode))
21982 width = ix86_cost->reassoc_vec_fp;
21984 if (width == 1)
21985 return 1;
21987 /* Integer vector instructions execute in FP unit
21988 and can execute 3 additions and one multiplication per cycle. */
21989 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2)
21990 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
21991 return 1;
21993 /* Account for targets that splits wide vectors into multiple parts. */
21994 if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
21995 div = GET_MODE_BITSIZE (mode) / 128;
21996 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
21997 div = GET_MODE_BITSIZE (mode) / 64;
21998 width = (width + div - 1) / div;
22000 /* Scalar part. */
22001 else if (INTEGRAL_MODE_P (mode))
22002 width = ix86_cost->reassoc_int;
22003 else if (FLOAT_MODE_P (mode))
22004 width = ix86_cost->reassoc_fp;
22006 /* Avoid using too many registers in 32bit mode. */
22007 if (!TARGET_64BIT && width > 2)
22008 width = 2;
22009 return width;
22012 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
22013 place emms and femms instructions. */
22015 static machine_mode
22016 ix86_preferred_simd_mode (scalar_mode mode)
22018 if (!TARGET_SSE)
22019 return word_mode;
22021 switch (mode)
22023 case E_QImode:
22024 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
22025 return V64QImode;
22026 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22027 return V32QImode;
22028 else
22029 return V16QImode;
22031 case E_HImode:
22032 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
22033 return V32HImode;
22034 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22035 return V16HImode;
22036 else
22037 return V8HImode;
22039 case E_SImode:
22040 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22041 return V16SImode;
22042 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22043 return V8SImode;
22044 else
22045 return V4SImode;
22047 case E_DImode:
22048 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22049 return V8DImode;
22050 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22051 return V4DImode;
22052 else
22053 return V2DImode;
22055 case E_SFmode:
22056 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22057 return V16SFmode;
22058 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22059 return V8SFmode;
22060 else
22061 return V4SFmode;
22063 case E_DFmode:
22064 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22065 return V8DFmode;
22066 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22067 return V4DFmode;
22068 else if (TARGET_SSE2)
22069 return V2DFmode;
22070 /* FALLTHRU */
22072 default:
22073 return word_mode;
22077 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
22078 vectors. If AVX512F is enabled then try vectorizing with 512bit,
22079 256bit and 128bit vectors. */
22081 static unsigned int
22082 ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
22084 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22086 modes->safe_push (V64QImode);
22087 modes->safe_push (V32QImode);
22088 modes->safe_push (V16QImode);
22090 else if (TARGET_AVX512F && all)
22092 modes->safe_push (V32QImode);
22093 modes->safe_push (V16QImode);
22094 modes->safe_push (V64QImode);
22096 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22098 modes->safe_push (V32QImode);
22099 modes->safe_push (V16QImode);
22101 else if (TARGET_AVX && all)
22103 modes->safe_push (V16QImode);
22104 modes->safe_push (V32QImode);
22106 else if (TARGET_MMX_WITH_SSE)
22107 modes->safe_push (V16QImode);
22109 if (TARGET_MMX_WITH_SSE)
22110 modes->safe_push (V8QImode);
22112 return 0;
22115 /* Implemenation of targetm.vectorize.get_mask_mode. */
22117 static opt_machine_mode
22118 ix86_get_mask_mode (machine_mode data_mode)
22120 unsigned vector_size = GET_MODE_SIZE (data_mode);
22121 unsigned nunits = GET_MODE_NUNITS (data_mode);
22122 unsigned elem_size = vector_size / nunits;
22124 /* Scalar mask case. */
22125 if ((TARGET_AVX512F && vector_size == 64)
22126 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
22128 if (elem_size == 4
22129 || elem_size == 8
22130 || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
22131 return smallest_int_mode_for_size (nunits);
22134 scalar_int_mode elem_mode
22135 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
22137 gcc_assert (elem_size * nunits == vector_size);
22139 return mode_for_vector (elem_mode, nunits);
22144 /* Return class of registers which could be used for pseudo of MODE
22145 and of class RCLASS for spilling instead of memory. Return NO_REGS
22146 if it is not possible or non-profitable. */
22148 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
22150 static reg_class_t
22151 ix86_spill_class (reg_class_t rclass, machine_mode mode)
22153 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
22154 && TARGET_SSE2
22155 && TARGET_INTER_UNIT_MOVES_TO_VEC
22156 && TARGET_INTER_UNIT_MOVES_FROM_VEC
22157 && (mode == SImode || (TARGET_64BIT && mode == DImode))
22158 && INTEGER_CLASS_P (rclass))
22159 return ALL_SSE_REGS;
22160 return NO_REGS;
22163 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
22164 but returns a lower bound. */
22166 static unsigned int
22167 ix86_max_noce_ifcvt_seq_cost (edge e)
22169 bool predictable_p = predictable_edge_p (e);
22170 if (predictable_p)
22172 if (global_options_set.x_param_max_rtl_if_conversion_predictable_cost)
22173 return param_max_rtl_if_conversion_predictable_cost;
22175 else
22177 if (global_options_set.x_param_max_rtl_if_conversion_unpredictable_cost)
22178 return param_max_rtl_if_conversion_unpredictable_cost;
22181 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
22184 /* Return true if SEQ is a good candidate as a replacement for the
22185 if-convertible sequence described in IF_INFO. */
22187 static bool
22188 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
22190 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
22192 int cmov_cnt = 0;
22193 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
22194 Maybe we should allow even more conditional moves as long as they
22195 are used far enough not to stall the CPU, or also consider
22196 IF_INFO->TEST_BB succ edge probabilities. */
22197 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
22199 rtx set = single_set (insn);
22200 if (!set)
22201 continue;
22202 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
22203 continue;
22204 rtx src = SET_SRC (set);
22205 machine_mode mode = GET_MODE (src);
22206 if (GET_MODE_CLASS (mode) != MODE_INT
22207 && GET_MODE_CLASS (mode) != MODE_FLOAT)
22208 continue;
22209 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
22210 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
22211 continue;
22212 /* insn is CMOV or FCMOV. */
22213 if (++cmov_cnt > 1)
22214 return false;
22217 return default_noce_conversion_profitable_p (seq, if_info);
22220 /* Implement targetm.vectorize.init_cost. */
22222 static void *
22223 ix86_init_cost (class loop *)
22225 unsigned *cost = XNEWVEC (unsigned, 3);
22226 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
22227 return cost;
22230 /* Implement targetm.vectorize.add_stmt_cost. */
22232 static unsigned
22233 ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count,
22234 enum vect_cost_for_stmt kind,
22235 class _stmt_vec_info *stmt_info, tree vectype,
22236 int misalign,
22237 enum vect_cost_model_location where)
22239 unsigned *cost = (unsigned *) data;
22240 unsigned retval = 0;
22241 bool scalar_p
22242 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
22243 int stmt_cost = - 1;
22245 bool fp = false;
22246 machine_mode mode = scalar_p ? SImode : TImode;
22248 if (vectype != NULL)
22250 fp = FLOAT_TYPE_P (vectype);
22251 mode = TYPE_MODE (vectype);
22252 if (scalar_p)
22253 mode = TYPE_MODE (TREE_TYPE (vectype));
22256 if ((kind == vector_stmt || kind == scalar_stmt)
22257 && stmt_info
22258 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
22260 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
22261 /*machine_mode inner_mode = mode;
22262 if (VECTOR_MODE_P (mode))
22263 inner_mode = GET_MODE_INNER (mode);*/
22265 switch (subcode)
22267 case PLUS_EXPR:
22268 case POINTER_PLUS_EXPR:
22269 case MINUS_EXPR:
22270 if (kind == scalar_stmt)
22272 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22273 stmt_cost = ix86_cost->addss;
22274 else if (X87_FLOAT_MODE_P (mode))
22275 stmt_cost = ix86_cost->fadd;
22276 else
22277 stmt_cost = ix86_cost->add;
22279 else
22280 stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
22281 : ix86_cost->sse_op);
22282 break;
22284 case MULT_EXPR:
22285 case WIDEN_MULT_EXPR:
22286 case MULT_HIGHPART_EXPR:
22287 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
22288 break;
22289 case NEGATE_EXPR:
22290 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22291 stmt_cost = ix86_cost->sse_op;
22292 else if (X87_FLOAT_MODE_P (mode))
22293 stmt_cost = ix86_cost->fchs;
22294 else if (VECTOR_MODE_P (mode))
22295 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
22296 else
22297 stmt_cost = ix86_cost->add;
22298 break;
22299 case TRUNC_DIV_EXPR:
22300 case CEIL_DIV_EXPR:
22301 case FLOOR_DIV_EXPR:
22302 case ROUND_DIV_EXPR:
22303 case TRUNC_MOD_EXPR:
22304 case CEIL_MOD_EXPR:
22305 case FLOOR_MOD_EXPR:
22306 case RDIV_EXPR:
22307 case ROUND_MOD_EXPR:
22308 case EXACT_DIV_EXPR:
22309 stmt_cost = ix86_division_cost (ix86_cost, mode);
22310 break;
22312 case RSHIFT_EXPR:
22313 case LSHIFT_EXPR:
22314 case LROTATE_EXPR:
22315 case RROTATE_EXPR:
22317 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
22318 stmt_cost = ix86_shift_rotate_cost
22319 (ix86_cost, mode,
22320 TREE_CODE (op2) == INTEGER_CST,
22321 cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1,
22322 true, false, false, NULL, NULL);
22324 break;
22325 case NOP_EXPR:
22326 /* Only sign-conversions are free. */
22327 if (tree_nop_conversion_p
22328 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
22329 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
22330 stmt_cost = 0;
22331 break;
22333 case BIT_IOR_EXPR:
22334 case ABS_EXPR:
22335 case ABSU_EXPR:
22336 case MIN_EXPR:
22337 case MAX_EXPR:
22338 case BIT_XOR_EXPR:
22339 case BIT_AND_EXPR:
22340 case BIT_NOT_EXPR:
22341 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22342 stmt_cost = ix86_cost->sse_op;
22343 else if (VECTOR_MODE_P (mode))
22344 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
22345 else
22346 stmt_cost = ix86_cost->add;
22347 break;
22348 default:
22349 break;
22353 combined_fn cfn;
22354 if ((kind == vector_stmt || kind == scalar_stmt)
22355 && stmt_info
22356 && stmt_info->stmt
22357 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
22358 switch (cfn)
22360 case CFN_FMA:
22361 stmt_cost = ix86_vec_cost (mode,
22362 mode == SFmode ? ix86_cost->fmass
22363 : ix86_cost->fmasd);
22364 break;
22365 default:
22366 break;
22369 /* If we do elementwise loads into a vector then we are bound by
22370 latency and execution resources for the many scalar loads
22371 (AGU and load ports). Try to account for this by scaling the
22372 construction cost by the number of elements involved. */
22373 if ((kind == vec_construct || kind == vec_to_scalar)
22374 && stmt_info
22375 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
22376 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
22377 && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
22378 && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) != INTEGER_CST)
22380 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
22381 stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
22383 if (stmt_cost == -1)
22384 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
22386 /* Penalize DFmode vector operations for Bonnell. */
22387 if (TARGET_BONNELL && kind == vector_stmt
22388 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
22389 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
22391 /* Statements in an inner loop relative to the loop being
22392 vectorized are weighted more heavily. The value here is
22393 arbitrary and could potentially be improved with analysis. */
22394 if (where == vect_body && stmt_info
22395 && stmt_in_inner_loop_p (vinfo, stmt_info))
22396 count *= 50; /* FIXME. */
22398 retval = (unsigned) (count * stmt_cost);
22400 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
22401 for Silvermont as it has out of order integer pipeline and can execute
22402 2 scalar instruction per tick, but has in order SIMD pipeline. */
22403 if ((TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
22404 || TARGET_TREMONT || TARGET_INTEL) && stmt_info && stmt_info->stmt)
22406 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
22407 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
22408 retval = (retval * 17) / 10;
22411 cost[where] += retval;
22413 return retval;
22416 /* Implement targetm.vectorize.finish_cost. */
22418 static void
22419 ix86_finish_cost (void *data, unsigned *prologue_cost,
22420 unsigned *body_cost, unsigned *epilogue_cost)
22422 unsigned *cost = (unsigned *) data;
22423 *prologue_cost = cost[vect_prologue];
22424 *body_cost = cost[vect_body];
22425 *epilogue_cost = cost[vect_epilogue];
22428 /* Implement targetm.vectorize.destroy_cost_data. */
22430 static void
22431 ix86_destroy_cost_data (void *data)
22433 free (data);
22436 /* Validate target specific memory model bits in VAL. */
22438 static unsigned HOST_WIDE_INT
22439 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
22441 enum memmodel model = memmodel_from_int (val);
22442 bool strong;
22444 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
22445 |MEMMODEL_MASK)
22446 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
22448 warning (OPT_Winvalid_memory_model,
22449 "unknown architecture specific memory model");
22450 return MEMMODEL_SEQ_CST;
22452 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
22453 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
22455 warning (OPT_Winvalid_memory_model,
22456 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
22457 "memory model");
22458 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
22460 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
22462 warning (OPT_Winvalid_memory_model,
22463 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
22464 "memory model");
22465 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
22467 return val;
22470 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
22471 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
22472 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
22473 or number of vecsize_mangle variants that should be emitted. */
22475 static int
22476 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
22477 struct cgraph_simd_clone *clonei,
22478 tree base_type, int num)
22480 int ret = 1;
22482 if (clonei->simdlen
22483 && (clonei->simdlen < 2
22484 || clonei->simdlen > 1024
22485 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
22487 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22488 "unsupported simdlen %d", clonei->simdlen);
22489 return 0;
22492 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
22493 if (TREE_CODE (ret_type) != VOID_TYPE)
22494 switch (TYPE_MODE (ret_type))
22496 case E_QImode:
22497 case E_HImode:
22498 case E_SImode:
22499 case E_DImode:
22500 case E_SFmode:
22501 case E_DFmode:
22502 /* case E_SCmode: */
22503 /* case E_DCmode: */
22504 if (!AGGREGATE_TYPE_P (ret_type))
22505 break;
22506 /* FALLTHRU */
22507 default:
22508 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22509 "unsupported return type %qT for simd", ret_type);
22510 return 0;
22513 tree t;
22514 int i;
22515 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
22516 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
22518 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
22519 t && t != void_list_node; t = TREE_CHAIN (t), i++)
22521 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
22522 switch (TYPE_MODE (arg_type))
22524 case E_QImode:
22525 case E_HImode:
22526 case E_SImode:
22527 case E_DImode:
22528 case E_SFmode:
22529 case E_DFmode:
22530 /* case E_SCmode: */
22531 /* case E_DCmode: */
22532 if (!AGGREGATE_TYPE_P (arg_type))
22533 break;
22534 /* FALLTHRU */
22535 default:
22536 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
22537 break;
22538 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22539 "unsupported argument type %qT for simd", arg_type);
22540 return 0;
22544 if (!TREE_PUBLIC (node->decl))
22546 /* If the function isn't exported, we can pick up just one ISA
22547 for the clones. */
22548 if (TARGET_AVX512F)
22549 clonei->vecsize_mangle = 'e';
22550 else if (TARGET_AVX2)
22551 clonei->vecsize_mangle = 'd';
22552 else if (TARGET_AVX)
22553 clonei->vecsize_mangle = 'c';
22554 else
22555 clonei->vecsize_mangle = 'b';
22556 ret = 1;
22558 else
22560 clonei->vecsize_mangle = "bcde"[num];
22561 ret = 4;
22563 clonei->mask_mode = VOIDmode;
22564 switch (clonei->vecsize_mangle)
22566 case 'b':
22567 clonei->vecsize_int = 128;
22568 clonei->vecsize_float = 128;
22569 break;
22570 case 'c':
22571 clonei->vecsize_int = 128;
22572 clonei->vecsize_float = 256;
22573 break;
22574 case 'd':
22575 clonei->vecsize_int = 256;
22576 clonei->vecsize_float = 256;
22577 break;
22578 case 'e':
22579 clonei->vecsize_int = 512;
22580 clonei->vecsize_float = 512;
22581 if (TYPE_MODE (base_type) == QImode)
22582 clonei->mask_mode = DImode;
22583 else
22584 clonei->mask_mode = SImode;
22585 break;
22587 if (clonei->simdlen == 0)
22589 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
22590 clonei->simdlen = clonei->vecsize_int;
22591 else
22592 clonei->simdlen = clonei->vecsize_float;
22593 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
22595 else if (clonei->simdlen > 16)
22597 /* For compatibility with ICC, use the same upper bounds
22598 for simdlen. In particular, for CTYPE below, use the return type,
22599 unless the function returns void, in that case use the characteristic
22600 type. If it is possible for given SIMDLEN to pass CTYPE value
22601 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
22602 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
22603 emit corresponding clone. */
22604 tree ctype = ret_type;
22605 if (TREE_CODE (ret_type) == VOID_TYPE)
22606 ctype = base_type;
22607 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
22608 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
22609 cnt /= clonei->vecsize_int;
22610 else
22611 cnt /= clonei->vecsize_float;
22612 if (cnt > (TARGET_64BIT ? 16 : 8))
22614 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22615 "unsupported simdlen %d", clonei->simdlen);
22616 return 0;
22619 return ret;
22622 /* If SIMD clone NODE can't be used in a vectorized loop
22623 in current function, return -1, otherwise return a badness of using it
22624 (0 if it is most desirable from vecsize_mangle point of view, 1
22625 slightly less desirable, etc.). */
22627 static int
22628 ix86_simd_clone_usable (struct cgraph_node *node)
22630 switch (node->simdclone->vecsize_mangle)
22632 case 'b':
22633 if (!TARGET_SSE2)
22634 return -1;
22635 if (!TARGET_AVX)
22636 return 0;
22637 return TARGET_AVX2 ? 2 : 1;
22638 case 'c':
22639 if (!TARGET_AVX)
22640 return -1;
22641 return TARGET_AVX2 ? 1 : 0;
22642 case 'd':
22643 if (!TARGET_AVX2)
22644 return -1;
22645 return 0;
22646 case 'e':
22647 if (!TARGET_AVX512F)
22648 return -1;
22649 return 0;
22650 default:
22651 gcc_unreachable ();
22655 /* This function adjusts the unroll factor based on
22656 the hardware capabilities. For ex, bdver3 has
22657 a loop buffer which makes unrolling of smaller
22658 loops less important. This function decides the
22659 unroll factor using number of memory references
22660 (value 32 is used) as a heuristic. */
22662 static unsigned
22663 ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
22665 basic_block *bbs;
22666 rtx_insn *insn;
22667 unsigned i;
22668 unsigned mem_count = 0;
22670 if (!TARGET_ADJUST_UNROLL)
22671 return nunroll;
22673 /* Count the number of memory references within the loop body.
22674 This value determines the unrolling factor for bdver3 and bdver4
22675 architectures. */
22676 subrtx_iterator::array_type array;
22677 bbs = get_loop_body (loop);
22678 for (i = 0; i < loop->num_nodes; i++)
22679 FOR_BB_INSNS (bbs[i], insn)
22680 if (NONDEBUG_INSN_P (insn))
22681 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
22682 if (const_rtx x = *iter)
22683 if (MEM_P (x))
22685 machine_mode mode = GET_MODE (x);
22686 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
22687 if (n_words > 4)
22688 mem_count += 2;
22689 else
22690 mem_count += 1;
22692 free (bbs);
22694 if (mem_count && mem_count <=32)
22695 return MIN (nunroll, 32 / mem_count);
22697 return nunroll;
22701 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
22703 static bool
22704 ix86_float_exceptions_rounding_supported_p (void)
22706 /* For x87 floating point with standard excess precision handling,
22707 there is no adddf3 pattern (since x87 floating point only has
22708 XFmode operations) so the default hook implementation gets this
22709 wrong. */
22710 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
22713 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
22715 static void
22716 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
22718 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
22719 return;
22720 tree exceptions_var = create_tmp_var_raw (integer_type_node);
22721 if (TARGET_80387)
22723 tree fenv_index_type = build_index_type (size_int (6));
22724 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
22725 tree fenv_var = create_tmp_var_raw (fenv_type);
22726 TREE_ADDRESSABLE (fenv_var) = 1;
22727 tree fenv_ptr = build_pointer_type (fenv_type);
22728 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
22729 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
22730 tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
22731 tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
22732 tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
22733 tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
22734 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
22735 tree hold_fnclex = build_call_expr (fnclex, 0);
22736 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
22737 NULL_TREE, NULL_TREE);
22738 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
22739 hold_fnclex);
22740 *clear = build_call_expr (fnclex, 0);
22741 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
22742 tree fnstsw_call = build_call_expr (fnstsw, 0);
22743 tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
22744 fnstsw_call, NULL_TREE, NULL_TREE);
22745 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
22746 tree update_mod = build4 (TARGET_EXPR, integer_type_node,
22747 exceptions_var, exceptions_x87,
22748 NULL_TREE, NULL_TREE);
22749 *update = build2 (COMPOUND_EXPR, integer_type_node,
22750 sw_mod, update_mod);
22751 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
22752 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
22754 if (TARGET_SSE && TARGET_SSE_MATH)
22756 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
22757 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
22758 tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
22759 tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
22760 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
22761 tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
22762 mxcsr_orig_var, stmxcsr_hold_call,
22763 NULL_TREE, NULL_TREE);
22764 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
22765 mxcsr_orig_var,
22766 build_int_cst (unsigned_type_node, 0x1f80));
22767 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
22768 build_int_cst (unsigned_type_node, 0xffffffc0));
22769 tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
22770 mxcsr_mod_var, hold_mod_val,
22771 NULL_TREE, NULL_TREE);
22772 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22773 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
22774 hold_assign_orig, hold_assign_mod);
22775 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
22776 ldmxcsr_hold_call);
22777 if (*hold)
22778 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
22779 else
22780 *hold = hold_all;
22781 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22782 if (*clear)
22783 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
22784 ldmxcsr_clear_call);
22785 else
22786 *clear = ldmxcsr_clear_call;
22787 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
22788 tree exceptions_sse = fold_convert (integer_type_node,
22789 stxmcsr_update_call);
22790 if (*update)
22792 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
22793 exceptions_var, exceptions_sse);
22794 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
22795 exceptions_var, exceptions_mod);
22796 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
22797 exceptions_assign);
22799 else
22800 *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
22801 exceptions_sse, NULL_TREE, NULL_TREE);
22802 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
22803 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22804 ldmxcsr_update_call);
22806 tree atomic_feraiseexcept
22807 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
22808 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
22809 1, exceptions_var);
22810 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22811 atomic_feraiseexcept_call);
22814 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
22815 /* For i386, common symbol is local only for non-PIE binaries. For
22816 x86-64, common symbol is local only for non-PIE binaries or linker
22817 supports copy reloc in PIE binaries. */
22819 static bool
22820 ix86_binds_local_p (const_tree exp)
22822 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
22823 (!flag_pic
22824 || (TARGET_64BIT
22825 && HAVE_LD_PIE_COPYRELOC != 0)));
22827 #endif
22829 /* If MEM is in the form of [base+offset], extract the two parts
22830 of address and set to BASE and OFFSET, otherwise return false. */
22832 static bool
22833 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
22835 rtx addr;
22837 gcc_assert (MEM_P (mem));
22839 addr = XEXP (mem, 0);
22841 if (GET_CODE (addr) == CONST)
22842 addr = XEXP (addr, 0);
22844 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
22846 *base = addr;
22847 *offset = const0_rtx;
22848 return true;
22851 if (GET_CODE (addr) == PLUS
22852 && (REG_P (XEXP (addr, 0))
22853 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
22854 && CONST_INT_P (XEXP (addr, 1)))
22856 *base = XEXP (addr, 0);
22857 *offset = XEXP (addr, 1);
22858 return true;
22861 return false;
22864 /* Given OPERANDS of consecutive load/store, check if we can merge
22865 them into move multiple. LOAD is true if they are load instructions.
22866 MODE is the mode of memory operands. */
22868 bool
22869 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
22870 machine_mode mode)
22872 HOST_WIDE_INT offval_1, offval_2, msize;
22873 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
22875 if (load)
22877 mem_1 = operands[1];
22878 mem_2 = operands[3];
22879 reg_1 = operands[0];
22880 reg_2 = operands[2];
22882 else
22884 mem_1 = operands[0];
22885 mem_2 = operands[2];
22886 reg_1 = operands[1];
22887 reg_2 = operands[3];
22890 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
22892 if (REGNO (reg_1) != REGNO (reg_2))
22893 return false;
22895 /* Check if the addresses are in the form of [base+offset]. */
22896 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
22897 return false;
22898 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
22899 return false;
22901 /* Check if the bases are the same. */
22902 if (!rtx_equal_p (base_1, base_2))
22903 return false;
22905 offval_1 = INTVAL (offset_1);
22906 offval_2 = INTVAL (offset_2);
22907 msize = GET_MODE_SIZE (mode);
22908 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
22909 if (offval_1 + msize != offval_2)
22910 return false;
22912 return true;
22915 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
22917 static bool
22918 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
22919 optimization_type opt_type)
22921 switch (op)
22923 case asin_optab:
22924 case acos_optab:
22925 case log1p_optab:
22926 case exp_optab:
22927 case exp10_optab:
22928 case exp2_optab:
22929 case expm1_optab:
22930 case ldexp_optab:
22931 case scalb_optab:
22932 case round_optab:
22933 return opt_type == OPTIMIZE_FOR_SPEED;
22935 case rint_optab:
22936 if (SSE_FLOAT_MODE_P (mode1)
22937 && TARGET_SSE_MATH
22938 && !flag_trapping_math
22939 && !TARGET_SSE4_1)
22940 return opt_type == OPTIMIZE_FOR_SPEED;
22941 return true;
22943 case floor_optab:
22944 case ceil_optab:
22945 case btrunc_optab:
22946 if (SSE_FLOAT_MODE_P (mode1)
22947 && TARGET_SSE_MATH
22948 && !flag_trapping_math
22949 && TARGET_SSE4_1)
22950 return true;
22951 return opt_type == OPTIMIZE_FOR_SPEED;
22953 case rsqrt_optab:
22954 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
22956 default:
22957 return true;
22961 /* Address space support.
22963 This is not "far pointers" in the 16-bit sense, but an easy way
22964 to use %fs and %gs segment prefixes. Therefore:
22966 (a) All address spaces have the same modes,
22967 (b) All address spaces have the same addresss forms,
22968 (c) While %fs and %gs are technically subsets of the generic
22969 address space, they are probably not subsets of each other.
22970 (d) Since we have no access to the segment base register values
22971 without resorting to a system call, we cannot convert a
22972 non-default address space to a default address space.
22973 Therefore we do not claim %fs or %gs are subsets of generic.
22975 Therefore we can (mostly) use the default hooks. */
22977 /* All use of segmentation is assumed to make address 0 valid. */
22979 static bool
22980 ix86_addr_space_zero_address_valid (addr_space_t as)
22982 return as != ADDR_SPACE_GENERIC;
22985 static void
22986 ix86_init_libfuncs (void)
22988 if (TARGET_64BIT)
22990 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
22991 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
22993 else
22995 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
22996 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
22999 #if TARGET_MACHO
23000 darwin_rename_builtins ();
23001 #endif
23004 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
23005 FPU, assume that the fpcw is set to extended precision; when using
23006 only SSE, rounding is correct; when using both SSE and the FPU,
23007 the rounding precision is indeterminate, since either may be chosen
23008 apparently at random. */
23010 static enum flt_eval_method
23011 ix86_excess_precision (enum excess_precision_type type)
23013 switch (type)
23015 case EXCESS_PRECISION_TYPE_FAST:
23016 /* The fastest type to promote to will always be the native type,
23017 whether that occurs with implicit excess precision or
23018 otherwise. */
23019 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
23020 case EXCESS_PRECISION_TYPE_STANDARD:
23021 case EXCESS_PRECISION_TYPE_IMPLICIT:
23022 /* Otherwise, the excess precision we want when we are
23023 in a standards compliant mode, and the implicit precision we
23024 provide would be identical were it not for the unpredictable
23025 cases. */
23026 if (!TARGET_80387)
23027 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
23028 else if (!TARGET_MIX_SSE_I387)
23030 if (!(TARGET_SSE && TARGET_SSE_MATH))
23031 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
23032 else if (TARGET_SSE2)
23033 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
23036 /* If we are in standards compliant mode, but we know we will
23037 calculate in unpredictable precision, return
23038 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
23039 excess precision if the target can't guarantee it will honor
23040 it. */
23041 return (type == EXCESS_PRECISION_TYPE_STANDARD
23042 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
23043 : FLT_EVAL_METHOD_UNPREDICTABLE);
23044 default:
23045 gcc_unreachable ();
23048 return FLT_EVAL_METHOD_UNPREDICTABLE;
23051 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
23052 decrements by exactly 2 no matter what the position was, there is no pushb.
23054 But as CIE data alignment factor on this arch is -4 for 32bit targets
23055 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
23056 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
23058 poly_int64
23059 ix86_push_rounding (poly_int64 bytes)
23061 return ROUND_UP (bytes, UNITS_PER_WORD);
23064 /* Target-specific selftests. */
23066 #if CHECKING_P
23068 namespace selftest {
23070 /* Verify that hard regs are dumped as expected (in compact mode). */
23072 static void
23073 ix86_test_dumping_hard_regs ()
23075 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
23076 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
23079 /* Test dumping an insn with repeated references to the same SCRATCH,
23080 to verify the rtx_reuse code. */
23082 static void
23083 ix86_test_dumping_memory_blockage ()
23085 set_new_first_and_last_insn (NULL, NULL);
23087 rtx pat = gen_memory_blockage ();
23088 rtx_reuse_manager r;
23089 r.preprocess (pat);
23091 /* Verify that the repeated references to the SCRATCH show use
23092 reuse IDS. The first should be prefixed with a reuse ID,
23093 and the second should be dumped as a "reuse_rtx" of that ID.
23094 The expected string assumes Pmode == DImode. */
23095 if (Pmode == DImode)
23096 ASSERT_RTL_DUMP_EQ_WITH_REUSE
23097 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
23098 " (unspec:BLK [\n"
23099 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
23100 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
23103 /* Verify loading an RTL dump; specifically a dump of copying
23104 a param on x86_64 from a hard reg into the frame.
23105 This test is target-specific since the dump contains target-specific
23106 hard reg names. */
23108 static void
23109 ix86_test_loading_dump_fragment_1 ()
23111 rtl_dump_test t (SELFTEST_LOCATION,
23112 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
23114 rtx_insn *insn = get_insn_by_uid (1);
23116 /* The block structure and indentation here is purely for
23117 readability; it mirrors the structure of the rtx. */
23118 tree mem_expr;
23120 rtx pat = PATTERN (insn);
23121 ASSERT_EQ (SET, GET_CODE (pat));
23123 rtx dest = SET_DEST (pat);
23124 ASSERT_EQ (MEM, GET_CODE (dest));
23125 /* Verify the "/c" was parsed. */
23126 ASSERT_TRUE (RTX_FLAG (dest, call));
23127 ASSERT_EQ (SImode, GET_MODE (dest));
23129 rtx addr = XEXP (dest, 0);
23130 ASSERT_EQ (PLUS, GET_CODE (addr));
23131 ASSERT_EQ (DImode, GET_MODE (addr));
23133 rtx lhs = XEXP (addr, 0);
23134 /* Verify that the "frame" REG was consolidated. */
23135 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
23138 rtx rhs = XEXP (addr, 1);
23139 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
23140 ASSERT_EQ (-4, INTVAL (rhs));
23143 /* Verify the "[1 i+0 S4 A32]" was parsed. */
23144 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
23145 /* "i" should have been handled by synthesizing a global int
23146 variable named "i". */
23147 mem_expr = MEM_EXPR (dest);
23148 ASSERT_NE (mem_expr, NULL);
23149 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
23150 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
23151 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
23152 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
23153 /* "+0". */
23154 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
23155 ASSERT_EQ (0, MEM_OFFSET (dest));
23156 /* "S4". */
23157 ASSERT_EQ (4, MEM_SIZE (dest));
23158 /* "A32. */
23159 ASSERT_EQ (32, MEM_ALIGN (dest));
23162 rtx src = SET_SRC (pat);
23163 ASSERT_EQ (REG, GET_CODE (src));
23164 ASSERT_EQ (SImode, GET_MODE (src));
23165 ASSERT_EQ (5, REGNO (src));
23166 tree reg_expr = REG_EXPR (src);
23167 /* "i" here should point to the same var as for the MEM_EXPR. */
23168 ASSERT_EQ (reg_expr, mem_expr);
23173 /* Verify that the RTL loader copes with a call_insn dump.
23174 This test is target-specific since the dump contains a target-specific
23175 hard reg name. */
23177 static void
23178 ix86_test_loading_call_insn ()
23180 /* The test dump includes register "xmm0", where requires TARGET_SSE
23181 to exist. */
23182 if (!TARGET_SSE)
23183 return;
23185 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
23187 rtx_insn *insn = get_insns ();
23188 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
23190 /* "/j". */
23191 ASSERT_TRUE (RTX_FLAG (insn, jump));
23193 rtx pat = PATTERN (insn);
23194 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
23196 /* Verify REG_NOTES. */
23198 /* "(expr_list:REG_CALL_DECL". */
23199 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
23200 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
23201 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
23203 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
23204 rtx_expr_list *note1 = note0->next ();
23205 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
23207 ASSERT_EQ (NULL, note1->next ());
23210 /* Verify CALL_INSN_FUNCTION_USAGE. */
23212 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
23213 rtx_expr_list *usage
23214 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
23215 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
23216 ASSERT_EQ (DFmode, GET_MODE (usage));
23217 ASSERT_EQ (USE, GET_CODE (usage->element ()));
23218 ASSERT_EQ (NULL, usage->next ());
23222 /* Verify that the RTL loader copes a dump from print_rtx_function.
23223 This test is target-specific since the dump contains target-specific
23224 hard reg names. */
23226 static void
23227 ix86_test_loading_full_dump ()
23229 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
23231 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
23233 rtx_insn *insn_1 = get_insn_by_uid (1);
23234 ASSERT_EQ (NOTE, GET_CODE (insn_1));
23236 rtx_insn *insn_7 = get_insn_by_uid (7);
23237 ASSERT_EQ (INSN, GET_CODE (insn_7));
23238 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
23240 rtx_insn *insn_15 = get_insn_by_uid (15);
23241 ASSERT_EQ (INSN, GET_CODE (insn_15));
23242 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
23244 /* Verify crtl->return_rtx. */
23245 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
23246 ASSERT_EQ (0, REGNO (crtl->return_rtx));
23247 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
23250 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
23251 In particular, verify that it correctly loads the 2nd operand.
23252 This test is target-specific since these are machine-specific
23253 operands (and enums). */
23255 static void
23256 ix86_test_loading_unspec ()
23258 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
23260 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
23262 ASSERT_TRUE (cfun);
23264 /* Test of an UNSPEC. */
23265 rtx_insn *insn = get_insns ();
23266 ASSERT_EQ (INSN, GET_CODE (insn));
23267 rtx set = single_set (insn);
23268 ASSERT_NE (NULL, set);
23269 rtx dst = SET_DEST (set);
23270 ASSERT_EQ (MEM, GET_CODE (dst));
23271 rtx src = SET_SRC (set);
23272 ASSERT_EQ (UNSPEC, GET_CODE (src));
23273 ASSERT_EQ (BLKmode, GET_MODE (src));
23274 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
23276 rtx v0 = XVECEXP (src, 0, 0);
23278 /* Verify that the two uses of the first SCRATCH have pointer
23279 equality. */
23280 rtx scratch_a = XEXP (dst, 0);
23281 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
23283 rtx scratch_b = XEXP (v0, 0);
23284 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
23286 ASSERT_EQ (scratch_a, scratch_b);
23288 /* Verify that the two mems are thus treated as equal. */
23289 ASSERT_TRUE (rtx_equal_p (dst, v0));
23291 /* Verify that the insn is recognized. */
23292 ASSERT_NE(-1, recog_memoized (insn));
23294 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
23295 insn = NEXT_INSN (insn);
23296 ASSERT_EQ (INSN, GET_CODE (insn));
23298 set = single_set (insn);
23299 ASSERT_NE (NULL, set);
23301 src = SET_SRC (set);
23302 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
23303 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
23306 /* Run all target-specific selftests. */
23308 static void
23309 ix86_run_selftests (void)
23311 ix86_test_dumping_hard_regs ();
23312 ix86_test_dumping_memory_blockage ();
23314 /* Various tests of loading RTL dumps, here because they contain
23315 ix86-isms (e.g. names of hard regs). */
23316 ix86_test_loading_dump_fragment_1 ();
23317 ix86_test_loading_call_insn ();
23318 ix86_test_loading_full_dump ();
23319 ix86_test_loading_unspec ();
23322 } // namespace selftest
23324 #endif /* CHECKING_P */
23326 /* Initialize the GCC target structure. */
23327 #undef TARGET_RETURN_IN_MEMORY
23328 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
23330 #undef TARGET_LEGITIMIZE_ADDRESS
23331 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
23333 #undef TARGET_ATTRIBUTE_TABLE
23334 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23335 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
23336 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
23337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23338 # undef TARGET_MERGE_DECL_ATTRIBUTES
23339 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23340 #endif
23342 #undef TARGET_COMP_TYPE_ATTRIBUTES
23343 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23345 #undef TARGET_INIT_BUILTINS
23346 #define TARGET_INIT_BUILTINS ix86_init_builtins
23347 #undef TARGET_BUILTIN_DECL
23348 #define TARGET_BUILTIN_DECL ix86_builtin_decl
23349 #undef TARGET_EXPAND_BUILTIN
23350 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23352 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23353 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23354 ix86_builtin_vectorized_function
23356 #undef TARGET_VECTORIZE_BUILTIN_GATHER
23357 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
23359 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
23360 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
23362 #undef TARGET_BUILTIN_RECIPROCAL
23363 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
23365 #undef TARGET_ASM_FUNCTION_EPILOGUE
23366 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23368 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
23369 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
23370 ix86_print_patchable_function_entry
23372 #undef TARGET_ENCODE_SECTION_INFO
23373 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23374 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23375 #else
23376 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23377 #endif
23379 #undef TARGET_ASM_OPEN_PAREN
23380 #define TARGET_ASM_OPEN_PAREN ""
23381 #undef TARGET_ASM_CLOSE_PAREN
23382 #define TARGET_ASM_CLOSE_PAREN ""
23384 #undef TARGET_ASM_BYTE_OP
23385 #define TARGET_ASM_BYTE_OP ASM_BYTE
23387 #undef TARGET_ASM_ALIGNED_HI_OP
23388 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23389 #undef TARGET_ASM_ALIGNED_SI_OP
23390 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23391 #ifdef ASM_QUAD
23392 #undef TARGET_ASM_ALIGNED_DI_OP
23393 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23394 #endif
23396 #undef TARGET_PROFILE_BEFORE_PROLOGUE
23397 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
23399 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
23400 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
23402 #undef TARGET_ASM_UNALIGNED_HI_OP
23403 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23404 #undef TARGET_ASM_UNALIGNED_SI_OP
23405 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23406 #undef TARGET_ASM_UNALIGNED_DI_OP
23407 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23409 #undef TARGET_PRINT_OPERAND
23410 #define TARGET_PRINT_OPERAND ix86_print_operand
23411 #undef TARGET_PRINT_OPERAND_ADDRESS
23412 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
23413 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
23414 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
23415 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
23416 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
23418 #undef TARGET_SCHED_INIT_GLOBAL
23419 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
23420 #undef TARGET_SCHED_ADJUST_COST
23421 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23422 #undef TARGET_SCHED_ISSUE_RATE
23423 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23424 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23425 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23426 ia32_multipass_dfa_lookahead
23427 #undef TARGET_SCHED_MACRO_FUSION_P
23428 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
23429 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
23430 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
23432 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23433 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23435 #undef TARGET_MEMMODEL_CHECK
23436 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
23438 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
23439 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
23441 #ifdef HAVE_AS_TLS
23442 #undef TARGET_HAVE_TLS
23443 #define TARGET_HAVE_TLS true
23444 #endif
23445 #undef TARGET_CANNOT_FORCE_CONST_MEM
23446 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23447 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23448 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
23450 #undef TARGET_DELEGITIMIZE_ADDRESS
23451 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23453 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
23454 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
23456 #undef TARGET_MS_BITFIELD_LAYOUT_P
23457 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23459 #if TARGET_MACHO
23460 #undef TARGET_BINDS_LOCAL_P
23461 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23462 #else
23463 #undef TARGET_BINDS_LOCAL_P
23464 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
23465 #endif
23466 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23467 #undef TARGET_BINDS_LOCAL_P
23468 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23469 #endif
23471 #undef TARGET_ASM_OUTPUT_MI_THUNK
23472 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23473 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23474 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23476 #undef TARGET_ASM_FILE_START
23477 #define TARGET_ASM_FILE_START x86_file_start
23479 #undef TARGET_OPTION_OVERRIDE
23480 #define TARGET_OPTION_OVERRIDE ix86_option_override
23482 #undef TARGET_REGISTER_MOVE_COST
23483 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
23484 #undef TARGET_MEMORY_MOVE_COST
23485 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
23486 #undef TARGET_RTX_COSTS
23487 #define TARGET_RTX_COSTS ix86_rtx_costs
23488 #undef TARGET_ADDRESS_COST
23489 #define TARGET_ADDRESS_COST ix86_address_cost
23491 #undef TARGET_FLAGS_REGNUM
23492 #define TARGET_FLAGS_REGNUM FLAGS_REG
23493 #undef TARGET_FIXED_CONDITION_CODE_REGS
23494 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23495 #undef TARGET_CC_MODES_COMPATIBLE
23496 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23498 #undef TARGET_MACHINE_DEPENDENT_REORG
23499 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23501 #undef TARGET_BUILD_BUILTIN_VA_LIST
23502 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23504 #undef TARGET_FOLD_BUILTIN
23505 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
23507 #undef TARGET_GIMPLE_FOLD_BUILTIN
23508 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
23510 #undef TARGET_COMPARE_VERSION_PRIORITY
23511 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
23513 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
23514 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
23515 ix86_generate_version_dispatcher_body
23517 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
23518 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
23519 ix86_get_function_versions_dispatcher
23521 #undef TARGET_ENUM_VA_LIST_P
23522 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
23524 #undef TARGET_FN_ABI_VA_LIST
23525 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
23527 #undef TARGET_CANONICAL_VA_LIST_TYPE
23528 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
23530 #undef TARGET_EXPAND_BUILTIN_VA_START
23531 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
23533 #undef TARGET_MD_ASM_ADJUST
23534 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
23536 #undef TARGET_C_EXCESS_PRECISION
23537 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
23538 #undef TARGET_PROMOTE_PROTOTYPES
23539 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
23540 #undef TARGET_SETUP_INCOMING_VARARGS
23541 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23542 #undef TARGET_MUST_PASS_IN_STACK
23543 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23544 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
23545 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
23546 #undef TARGET_FUNCTION_ARG_ADVANCE
23547 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
23548 #undef TARGET_FUNCTION_ARG
23549 #define TARGET_FUNCTION_ARG ix86_function_arg
23550 #undef TARGET_INIT_PIC_REG
23551 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
23552 #undef TARGET_USE_PSEUDO_PIC_REG
23553 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
23554 #undef TARGET_FUNCTION_ARG_BOUNDARY
23555 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
23556 #undef TARGET_PASS_BY_REFERENCE
23557 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23558 #undef TARGET_INTERNAL_ARG_POINTER
23559 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23560 #undef TARGET_UPDATE_STACK_BOUNDARY
23561 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
23562 #undef TARGET_GET_DRAP_RTX
23563 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
23564 #undef TARGET_STRICT_ARGUMENT_NAMING
23565 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23566 #undef TARGET_STATIC_CHAIN
23567 #define TARGET_STATIC_CHAIN ix86_static_chain
23568 #undef TARGET_TRAMPOLINE_INIT
23569 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
23570 #undef TARGET_RETURN_POPS_ARGS
23571 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
23573 #undef TARGET_WARN_FUNC_RETURN
23574 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
23576 #undef TARGET_LEGITIMATE_COMBINED_INSN
23577 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
23579 #undef TARGET_ASAN_SHADOW_OFFSET
23580 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
23582 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23583 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23585 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23586 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23588 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23589 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23591 #undef TARGET_C_MODE_FOR_SUFFIX
23592 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23594 #ifdef HAVE_AS_TLS
23595 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23596 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23597 #endif
23599 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23600 #undef TARGET_INSERT_ATTRIBUTES
23601 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23602 #endif
23604 #undef TARGET_MANGLE_TYPE
23605 #define TARGET_MANGLE_TYPE ix86_mangle_type
23607 #undef TARGET_STACK_PROTECT_GUARD
23608 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
23610 #if !TARGET_MACHO
23611 #undef TARGET_STACK_PROTECT_FAIL
23612 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23613 #endif
23615 #undef TARGET_FUNCTION_VALUE
23616 #define TARGET_FUNCTION_VALUE ix86_function_value
23618 #undef TARGET_FUNCTION_VALUE_REGNO_P
23619 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
23621 #undef TARGET_ZERO_CALL_USED_REGS
23622 #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
23624 #undef TARGET_PROMOTE_FUNCTION_MODE
23625 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
23627 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
23628 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
23630 #undef TARGET_MEMBER_TYPE_FORCES_BLK
23631 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
23633 #undef TARGET_INSTANTIATE_DECLS
23634 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
23636 #undef TARGET_SECONDARY_RELOAD
23637 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
23638 #undef TARGET_SECONDARY_MEMORY_NEEDED
23639 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
23640 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
23641 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
23643 #undef TARGET_CLASS_MAX_NREGS
23644 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
23646 #undef TARGET_PREFERRED_RELOAD_CLASS
23647 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
23648 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
23649 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
23650 #undef TARGET_CLASS_LIKELY_SPILLED_P
23651 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
23653 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
23654 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
23655 ix86_builtin_vectorization_cost
23656 #undef TARGET_VECTORIZE_VEC_PERM_CONST
23657 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
23658 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
23659 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
23660 ix86_preferred_simd_mode
23661 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
23662 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
23663 ix86_split_reduction
23664 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
23665 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
23666 ix86_autovectorize_vector_modes
23667 #undef TARGET_VECTORIZE_GET_MASK_MODE
23668 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
23669 #undef TARGET_VECTORIZE_INIT_COST
23670 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
23671 #undef TARGET_VECTORIZE_ADD_STMT_COST
23672 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
23673 #undef TARGET_VECTORIZE_FINISH_COST
23674 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
23675 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
23676 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
23678 #undef TARGET_SET_CURRENT_FUNCTION
23679 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
23681 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
23682 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
23684 #undef TARGET_OPTION_SAVE
23685 #define TARGET_OPTION_SAVE ix86_function_specific_save
23687 #undef TARGET_OPTION_RESTORE
23688 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
23690 #undef TARGET_OPTION_POST_STREAM_IN
23691 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
23693 #undef TARGET_OPTION_PRINT
23694 #define TARGET_OPTION_PRINT ix86_function_specific_print
23696 #undef TARGET_OPTION_FUNCTION_VERSIONS
23697 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
23699 #undef TARGET_CAN_INLINE_P
23700 #define TARGET_CAN_INLINE_P ix86_can_inline_p
23702 #undef TARGET_LEGITIMATE_ADDRESS_P
23703 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
23705 #undef TARGET_REGISTER_PRIORITY
23706 #define TARGET_REGISTER_PRIORITY ix86_register_priority
23708 #undef TARGET_REGISTER_USAGE_LEVELING_P
23709 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
23711 #undef TARGET_LEGITIMATE_CONSTANT_P
23712 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
23714 #undef TARGET_COMPUTE_FRAME_LAYOUT
23715 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
23717 #undef TARGET_FRAME_POINTER_REQUIRED
23718 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
23720 #undef TARGET_CAN_ELIMINATE
23721 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
23723 #undef TARGET_EXTRA_LIVE_ON_ENTRY
23724 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
23726 #undef TARGET_ASM_CODE_END
23727 #define TARGET_ASM_CODE_END ix86_code_end
23729 #undef TARGET_CONDITIONAL_REGISTER_USAGE
23730 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
23732 #undef TARGET_CANONICALIZE_COMPARISON
23733 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
23735 #undef TARGET_LOOP_UNROLL_ADJUST
23736 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
23738 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
23739 #undef TARGET_SPILL_CLASS
23740 #define TARGET_SPILL_CLASS ix86_spill_class
23742 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
23743 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
23744 ix86_simd_clone_compute_vecsize_and_simdlen
23746 #undef TARGET_SIMD_CLONE_ADJUST
23747 #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
23749 #undef TARGET_SIMD_CLONE_USABLE
23750 #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
23752 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
23753 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
23755 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
23756 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
23757 ix86_float_exceptions_rounding_supported_p
23759 #undef TARGET_MODE_EMIT
23760 #define TARGET_MODE_EMIT ix86_emit_mode_set
23762 #undef TARGET_MODE_NEEDED
23763 #define TARGET_MODE_NEEDED ix86_mode_needed
23765 #undef TARGET_MODE_AFTER
23766 #define TARGET_MODE_AFTER ix86_mode_after
23768 #undef TARGET_MODE_ENTRY
23769 #define TARGET_MODE_ENTRY ix86_mode_entry
23771 #undef TARGET_MODE_EXIT
23772 #define TARGET_MODE_EXIT ix86_mode_exit
23774 #undef TARGET_MODE_PRIORITY
23775 #define TARGET_MODE_PRIORITY ix86_mode_priority
23777 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
23778 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
23780 #undef TARGET_OFFLOAD_OPTIONS
23781 #define TARGET_OFFLOAD_OPTIONS \
23782 ix86_offload_options
23784 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
23785 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
23787 #undef TARGET_OPTAB_SUPPORTED_P
23788 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
23790 #undef TARGET_HARD_REGNO_SCRATCH_OK
23791 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
23793 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
23794 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
23796 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
23797 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
23799 #undef TARGET_INIT_LIBFUNCS
23800 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
23802 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
23803 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
23805 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
23806 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
23808 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
23809 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
23811 #undef TARGET_HARD_REGNO_NREGS
23812 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
23813 #undef TARGET_HARD_REGNO_MODE_OK
23814 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
23816 #undef TARGET_MODES_TIEABLE_P
23817 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
23819 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
23820 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
23821 ix86_hard_regno_call_part_clobbered
23823 #undef TARGET_CAN_CHANGE_MODE_CLASS
23824 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
23826 #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
23827 #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
23829 #undef TARGET_STATIC_RTX_ALIGNMENT
23830 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
23831 #undef TARGET_CONSTANT_ALIGNMENT
23832 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
23834 #undef TARGET_EMPTY_RECORD_P
23835 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
23837 #undef TARGET_WARN_PARAMETER_PASSING_ABI
23838 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
23840 #undef TARGET_GET_MULTILIB_ABI_NAME
23841 #define TARGET_GET_MULTILIB_ABI_NAME \
23842 ix86_get_multilib_abi_name
23844 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
23846 #ifdef OPTION_GLIBC
23847 if (OPTION_GLIBC)
23848 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
23849 else
23850 return false;
23851 #else
23852 return false;
23853 #endif
23856 #undef TARGET_LIBC_HAS_FAST_FUNCTION
23857 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
23859 #if CHECKING_P
23860 #undef TARGET_RUN_TARGET_SELFTESTS
23861 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
23862 #endif /* #if CHECKING_P */
23864 struct gcc_target targetm = TARGET_INITIALIZER;
23866 #include "gt-i386.h"