[i386] Fold __builtin_ia32_shufpd to VEC_PERM_EXPR
[official-gcc.git] / gcc / config / i386 / i386.c
blob54607748b0baac01b200eb66a9f2a689d2cc1e25
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic.h"
42 #include "cfgbuild.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "calls.h"
47 #include "stor-layout.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "cfgrtl.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "gimplify.h"
60 #include "dwarf2.h"
61 #include "tm-constrs.h"
62 #include "params.h"
63 #include "cselib.h"
64 #include "sched-int.h"
65 #include "opts.h"
66 #include "tree-pass.h"
67 #include "context.h"
68 #include "pass_manager.h"
69 #include "target-globals.h"
70 #include "gimple-iterator.h"
71 #include "tree-vectorizer.h"
72 #include "shrink-wrap.h"
73 #include "builtins.h"
74 #include "rtl-iter.h"
75 #include "tree-iterator.h"
76 #include "dbgcnt.h"
77 #include "case-cfn-macros.h"
78 #include "dojump.h"
79 #include "fold-const-call.h"
80 #include "tree-vrp.h"
81 #include "tree-ssanames.h"
82 #include "selftest.h"
83 #include "selftest-rtl.h"
84 #include "print-rtl.h"
85 #include "intl.h"
86 #include "ifcvt.h"
87 #include "symbol-summary.h"
88 #include "ipa-prop.h"
89 #include "ipa-fnsummary.h"
90 #include "wide-int-bitmask.h"
91 #include "tree-vector-builder.h"
92 #include "debug.h"
93 #include "dwarf2out.h"
94 #include "i386-options.h"
95 #include "i386-builtins.h"
96 #include "i386-expand.h"
97 #include "i386-features.h"
99 /* This file should be included last. */
100 #include "target-def.h"
102 static rtx legitimize_dllimport_symbol (rtx, bool);
103 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
104 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
105 static void ix86_emit_restore_reg_using_pop (rtx);
108 #ifndef CHECK_STACK_LIMIT
109 #define CHECK_STACK_LIMIT (-1)
110 #endif
112 /* Return index of given mode in mult and division cost tables. */
113 #define MODE_INDEX(mode) \
114 ((mode) == QImode ? 0 \
115 : (mode) == HImode ? 1 \
116 : (mode) == SImode ? 2 \
117 : (mode) == DImode ? 3 \
118 : 4)
121 /* Set by -mtune. */
122 const struct processor_costs *ix86_tune_cost = NULL;
124 /* Set by -mtune or -Os. */
125 const struct processor_costs *ix86_cost = NULL;
127 /* In case the average insn count for single function invocation is
128 lower than this constant, emit fast (but longer) prologue and
129 epilogue code. */
130 #define FAST_PROLOGUE_INSN_COUNT 20
132 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
133 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
134 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
135 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
137 /* Array of the smallest class containing reg number REGNO, indexed by
138 REGNO. Used by REGNO_REG_CLASS in i386.h. */
140 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
142 /* ax, dx, cx, bx */
143 AREG, DREG, CREG, BREG,
144 /* si, di, bp, sp */
145 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
146 /* FP registers */
147 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
148 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
149 /* arg pointer, flags, fpsr, frame */
150 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
151 /* SSE registers */
152 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
153 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
154 /* MMX registers */
155 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
156 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
157 /* REX registers */
158 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
159 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
160 /* SSE REX registers */
161 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
162 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
163 /* AVX-512 SSE registers */
164 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
165 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
166 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
167 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
168 /* Mask registers. */
169 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
170 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS
173 /* The "default" register map used in 32bit mode. */
175 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
177 /* general regs */
178 0, 2, 1, 3, 6, 7, 4, 5,
179 /* fp regs */
180 12, 13, 14, 15, 16, 17, 18, 19,
181 /* arg, flags, fpsr, frame */
182 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
183 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
184 /* SSE */
185 21, 22, 23, 24, 25, 26, 27, 28,
186 /* MMX */
187 29, 30, 31, 32, 33, 34, 35, 36,
188 /* extended integer registers */
189 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
190 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
191 /* extended sse registers */
192 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
193 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
194 /* AVX-512 registers 16-23 */
195 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
196 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
197 /* AVX-512 registers 24-31 */
198 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 /* Mask registers */
201 93, 94, 95, 96, 97, 98, 99, 100
204 /* The "default" register map used in 64bit mode. */
206 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
208 /* general regs */
209 0, 1, 2, 3, 4, 5, 6, 7,
210 /* fp regs */
211 33, 34, 35, 36, 37, 38, 39, 40,
212 /* arg, flags, fpsr, frame */
213 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
214 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
215 /* SSE */
216 17, 18, 19, 20, 21, 22, 23, 24,
217 /* MMX */
218 41, 42, 43, 44, 45, 46, 47, 48,
219 /* extended integer registers */
220 8, 9, 10, 11, 12, 13, 14, 15,
221 /* extended SSE registers */
222 25, 26, 27, 28, 29, 30, 31, 32,
223 /* AVX-512 registers 16-23 */
224 67, 68, 69, 70, 71, 72, 73, 74,
225 /* AVX-512 registers 24-31 */
226 75, 76, 77, 78, 79, 80, 81, 82,
227 /* Mask registers */
228 118, 119, 120, 121, 122, 123, 124, 125
231 /* Define the register numbers to be used in Dwarf debugging information.
232 The SVR4 reference port C compiler uses the following register numbers
233 in its Dwarf output code:
234 0 for %eax (gcc regno = 0)
235 1 for %ecx (gcc regno = 2)
236 2 for %edx (gcc regno = 1)
237 3 for %ebx (gcc regno = 3)
238 4 for %esp (gcc regno = 7)
239 5 for %ebp (gcc regno = 6)
240 6 for %esi (gcc regno = 4)
241 7 for %edi (gcc regno = 5)
242 The following three DWARF register numbers are never generated by
243 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
244 believed these numbers have these meanings.
245 8 for %eip (no gcc equivalent)
246 9 for %eflags (gcc regno = 17)
247 10 for %trapno (no gcc equivalent)
248 It is not at all clear how we should number the FP stack registers
249 for the x86 architecture. If the version of SDB on x86/svr4 were
250 a bit less brain dead with respect to floating-point then we would
251 have a precedent to follow with respect to DWARF register numbers
252 for x86 FP registers, but the SDB on x86/svr4 was so completely
253 broken with respect to FP registers that it is hardly worth thinking
254 of it as something to strive for compatibility with.
255 The version of x86/svr4 SDB I had does (partially)
256 seem to believe that DWARF register number 11 is associated with
257 the x86 register %st(0), but that's about all. Higher DWARF
258 register numbers don't seem to be associated with anything in
259 particular, and even for DWARF regno 11, SDB only seemed to under-
260 stand that it should say that a variable lives in %st(0) (when
261 asked via an `=' command) if we said it was in DWARF regno 11,
262 but SDB still printed garbage when asked for the value of the
263 variable in question (via a `/' command).
264 (Also note that the labels SDB printed for various FP stack regs
265 when doing an `x' command were all wrong.)
266 Note that these problems generally don't affect the native SVR4
267 C compiler because it doesn't allow the use of -O with -g and
268 because when it is *not* optimizing, it allocates a memory
269 location for each floating-point variable, and the memory
270 location is what gets described in the DWARF AT_location
271 attribute for the variable in question.
272 Regardless of the severe mental illness of the x86/svr4 SDB, we
273 do something sensible here and we use the following DWARF
274 register numbers. Note that these are all stack-top-relative
275 numbers.
276 11 for %st(0) (gcc regno = 8)
277 12 for %st(1) (gcc regno = 9)
278 13 for %st(2) (gcc regno = 10)
279 14 for %st(3) (gcc regno = 11)
280 15 for %st(4) (gcc regno = 12)
281 16 for %st(5) (gcc regno = 13)
282 17 for %st(6) (gcc regno = 14)
283 18 for %st(7) (gcc regno = 15)
285 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
287 /* general regs */
288 0, 2, 1, 3, 6, 7, 5, 4,
289 /* fp regs */
290 11, 12, 13, 14, 15, 16, 17, 18,
291 /* arg, flags, fpsr, frame */
292 IGNORED_DWARF_REGNUM, 9,
293 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
294 /* SSE registers */
295 21, 22, 23, 24, 25, 26, 27, 28,
296 /* MMX registers */
297 29, 30, 31, 32, 33, 34, 35, 36,
298 /* extended integer registers */
299 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
300 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
301 /* extended sse registers */
302 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
303 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
304 /* AVX-512 registers 16-23 */
305 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
306 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
307 /* AVX-512 registers 24-31 */
308 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
309 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
310 /* Mask registers */
311 93, 94, 95, 96, 97, 98, 99, 100
314 /* Define parameter passing and return registers. */
316 static int const x86_64_int_parameter_registers[6] =
318 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
321 static int const x86_64_ms_abi_int_parameter_registers[4] =
323 CX_REG, DX_REG, R8_REG, R9_REG
326 static int const x86_64_int_return_registers[4] =
328 AX_REG, DX_REG, DI_REG, SI_REG
331 /* Define the structure for the machine field in struct function. */
333 struct GTY(()) stack_local_entry {
334 unsigned short mode;
335 unsigned short n;
336 rtx rtl;
337 struct stack_local_entry *next;
340 /* Which cpu are we scheduling for. */
341 enum attr_cpu ix86_schedule;
343 /* Which cpu are we optimizing for. */
344 enum processor_type ix86_tune;
346 /* Which instruction set architecture to use. */
347 enum processor_type ix86_arch;
349 /* True if processor has SSE prefetch instruction. */
350 unsigned char x86_prefetch_sse;
352 rtx (*ix86_gen_leave) (void);
353 rtx (*ix86_gen_add3) (rtx, rtx, rtx);
354 rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
355 rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
356 rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
357 rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
358 rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
359 rtx (*ix86_gen_clzero) (rtx);
360 rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
361 rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
362 rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
363 rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
364 rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
365 rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
367 /* Preferred alignment for stack boundary in bits. */
368 unsigned int ix86_preferred_stack_boundary;
370 /* Alignment for incoming stack boundary in bits specified at
371 command line. */
372 unsigned int ix86_user_incoming_stack_boundary;
374 /* Default alignment for incoming stack boundary in bits. */
375 unsigned int ix86_default_incoming_stack_boundary;
377 /* Alignment for incoming stack boundary in bits. */
378 unsigned int ix86_incoming_stack_boundary;
380 /* Calling abi specific va_list type nodes. */
381 tree sysv_va_list_type_node;
382 tree ms_va_list_type_node;
384 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
385 char internal_label_prefix[16];
386 int internal_label_prefix_len;
388 /* Fence to use after loop using movnt. */
389 tree x86_mfence;
391 /* Register class used for passing given 64bit part of the argument.
392 These represent classes as documented by the PS ABI, with the exception
393 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
394 use SF or DFmode move instead of DImode to avoid reformatting penalties.
396 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
397 whenever possible (upper half does contain padding). */
398 enum x86_64_reg_class
400 X86_64_NO_CLASS,
401 X86_64_INTEGER_CLASS,
402 X86_64_INTEGERSI_CLASS,
403 X86_64_SSE_CLASS,
404 X86_64_SSESF_CLASS,
405 X86_64_SSEDF_CLASS,
406 X86_64_SSEUP_CLASS,
407 X86_64_X87_CLASS,
408 X86_64_X87UP_CLASS,
409 X86_64_COMPLEX_X87_CLASS,
410 X86_64_MEMORY_CLASS
413 #define MAX_CLASSES 8
415 /* Table of constants used by fldpi, fldln2, etc.... */
416 static REAL_VALUE_TYPE ext_80387_constants_table [5];
417 static bool ext_80387_constants_init;
420 static rtx ix86_function_value (const_tree, const_tree, bool);
421 static bool ix86_function_value_regno_p (const unsigned int);
422 static unsigned int ix86_function_arg_boundary (machine_mode,
423 const_tree);
424 static rtx ix86_static_chain (const_tree, bool);
425 static int ix86_function_regparm (const_tree, const_tree);
426 static void ix86_compute_frame_layout (void);
427 static tree ix86_canonical_va_list_type (tree);
428 static unsigned int split_stack_prologue_scratch_regno (void);
429 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
431 static bool ix86_can_inline_p (tree, tree);
432 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
435 /* Whether -mtune= or -march= were specified */
436 int ix86_tune_defaulted;
437 int ix86_arch_specified;
439 /* Return true if a red-zone is in use. We can't use red-zone when
440 there are local indirect jumps, like "indirect_jump" or "tablejump",
441 which jumps to another place in the function, since "call" in the
442 indirect thunk pushes the return address onto stack, destroying
443 red-zone.
445 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
446 for CALL, in red-zone, we can allow local indirect jumps with
447 indirect thunk. */
449 bool
450 ix86_using_red_zone (void)
452 return (TARGET_RED_ZONE
453 && !TARGET_64BIT_MS_ABI
454 && (!cfun->machine->has_local_indirect_jump
455 || cfun->machine->indirect_branch_type == indirect_branch_keep));
458 /* Return true, if profiling code should be emitted before
459 prologue. Otherwise it returns false.
460 Note: For x86 with "hotfix" it is sorried. */
461 static bool
462 ix86_profile_before_prologue (void)
464 return flag_fentry != 0;
467 /* Update register usage after having seen the compiler flags. */
469 static void
470 ix86_conditional_register_usage (void)
472 int i, c_mask;
474 /* If there are no caller-saved registers, preserve all registers.
475 except fixed_regs and registers used for function return value
476 since aggregate_value_p checks call_used_regs[regno] on return
477 value. */
478 if (cfun && cfun->machine->no_caller_saved_registers)
479 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
480 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
481 call_used_regs[i] = 0;
483 /* For 32-bit targets, disable the REX registers. */
484 if (! TARGET_64BIT)
486 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
487 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
488 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
489 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
490 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
491 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
494 /* See the definition of CALL_USED_REGISTERS in i386.h. */
495 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
497 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
499 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
501 /* Set/reset conditionally defined registers from
502 CALL_USED_REGISTERS initializer. */
503 if (call_used_regs[i] > 1)
504 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
506 /* Calculate registers of CLOBBERED_REGS register set
507 as call used registers from GENERAL_REGS register set. */
508 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
509 && call_used_regs[i])
510 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
513 /* If MMX is disabled, disable the registers. */
514 if (! TARGET_MMX)
515 AND_COMPL_HARD_REG_SET (accessible_reg_set,
516 reg_class_contents[(int) MMX_REGS]);
518 /* If SSE is disabled, disable the registers. */
519 if (! TARGET_SSE)
520 AND_COMPL_HARD_REG_SET (accessible_reg_set,
521 reg_class_contents[(int) ALL_SSE_REGS]);
523 /* If the FPU is disabled, disable the registers. */
524 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
525 AND_COMPL_HARD_REG_SET (accessible_reg_set,
526 reg_class_contents[(int) FLOAT_REGS]);
528 /* If AVX512F is disabled, disable the registers. */
529 if (! TARGET_AVX512F)
531 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
532 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
534 AND_COMPL_HARD_REG_SET (accessible_reg_set,
535 reg_class_contents[(int) ALL_MASK_REGS]);
539 /* Canonicalize a comparison from one we don't have to one we do have. */
541 static void
542 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
543 bool op0_preserve_value)
545 /* The order of operands in x87 ficom compare is forced by combine in
546 simplify_comparison () function. Float operator is treated as RTX_OBJ
547 with a precedence over other operators and is always put in the first
548 place. Swap condition and operands to match ficom instruction. */
549 if (!op0_preserve_value
550 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
552 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
554 /* We are called only for compares that are split to SAHF instruction.
555 Ensure that we have setcc/jcc insn for the swapped condition. */
556 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
558 std::swap (*op0, *op1);
559 *code = (int) scode;
565 /* Hook to determine if one function can safely inline another. */
567 static bool
568 ix86_can_inline_p (tree caller, tree callee)
570 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
571 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
573 /* Changes of those flags can be tolerated for always inlines. Lets hope
574 user knows what he is doing. */
575 const unsigned HOST_WIDE_INT always_inline_safe_mask
576 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
577 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
578 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
579 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
580 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
581 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
582 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
585 if (!callee_tree)
586 callee_tree = target_option_default_node;
587 if (!caller_tree)
588 caller_tree = target_option_default_node;
589 if (callee_tree == caller_tree)
590 return true;
592 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
593 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
594 bool ret = false;
595 bool always_inline
596 = (DECL_DISREGARD_INLINE_LIMITS (callee)
597 && lookup_attribute ("always_inline",
598 DECL_ATTRIBUTES (callee)));
600 cgraph_node *callee_node = cgraph_node::get (callee);
601 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
602 function can inline a SSE2 function but a SSE2 function can't inline
603 a SSE4 function. */
604 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
605 != callee_opts->x_ix86_isa_flags)
606 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
607 != callee_opts->x_ix86_isa_flags2))
608 ret = false;
610 /* See if we have the same non-isa options. */
611 else if ((!always_inline
612 && caller_opts->x_target_flags != callee_opts->x_target_flags)
613 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
614 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
615 ret = false;
617 /* See if arch, tune, etc. are the same. */
618 else if (caller_opts->arch != callee_opts->arch)
619 ret = false;
621 else if (!always_inline && caller_opts->tune != callee_opts->tune)
622 ret = false;
624 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
625 /* If the calle doesn't use FP expressions differences in
626 ix86_fpmath can be ignored. We are called from FEs
627 for multi-versioning call optimization, so beware of
628 ipa_fn_summaries not available. */
629 && (! ipa_fn_summaries
630 || ipa_fn_summaries->get (callee_node) == NULL
631 || ipa_fn_summaries->get (callee_node)->fp_expressions))
632 ret = false;
634 else if (!always_inline
635 && caller_opts->branch_cost != callee_opts->branch_cost)
636 ret = false;
638 else
639 ret = true;
641 return ret;
644 /* Return true if this goes in large data/bss. */
646 static bool
647 ix86_in_large_data_p (tree exp)
649 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
650 return false;
652 if (exp == NULL_TREE)
653 return false;
655 /* Functions are never large data. */
656 if (TREE_CODE (exp) == FUNCTION_DECL)
657 return false;
659 /* Automatic variables are never large data. */
660 if (VAR_P (exp) && !is_global_var (exp))
661 return false;
663 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
665 const char *section = DECL_SECTION_NAME (exp);
666 if (strcmp (section, ".ldata") == 0
667 || strcmp (section, ".lbss") == 0)
668 return true;
669 return false;
671 else
673 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
675 /* If this is an incomplete type with size 0, then we can't put it
676 in data because it might be too big when completed. Also,
677 int_size_in_bytes returns -1 if size can vary or is larger than
678 an integer in which case also it is safer to assume that it goes in
679 large data. */
680 if (size <= 0 || size > ix86_section_threshold)
681 return true;
684 return false;
687 /* i386-specific section flag to mark large sections. */
688 #define SECTION_LARGE SECTION_MACH_DEP
690 /* Switch to the appropriate section for output of DECL.
691 DECL is either a `VAR_DECL' node or a constant of some sort.
692 RELOC indicates whether forming the initial value of DECL requires
693 link-time relocations. */
695 ATTRIBUTE_UNUSED static section *
696 x86_64_elf_select_section (tree decl, int reloc,
697 unsigned HOST_WIDE_INT align)
699 if (ix86_in_large_data_p (decl))
701 const char *sname = NULL;
702 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
703 switch (categorize_decl_for_section (decl, reloc))
705 case SECCAT_DATA:
706 sname = ".ldata";
707 break;
708 case SECCAT_DATA_REL:
709 sname = ".ldata.rel";
710 break;
711 case SECCAT_DATA_REL_LOCAL:
712 sname = ".ldata.rel.local";
713 break;
714 case SECCAT_DATA_REL_RO:
715 sname = ".ldata.rel.ro";
716 break;
717 case SECCAT_DATA_REL_RO_LOCAL:
718 sname = ".ldata.rel.ro.local";
719 break;
720 case SECCAT_BSS:
721 sname = ".lbss";
722 flags |= SECTION_BSS;
723 break;
724 case SECCAT_RODATA:
725 case SECCAT_RODATA_MERGE_STR:
726 case SECCAT_RODATA_MERGE_STR_INIT:
727 case SECCAT_RODATA_MERGE_CONST:
728 sname = ".lrodata";
729 flags &= ~SECTION_WRITE;
730 break;
731 case SECCAT_SRODATA:
732 case SECCAT_SDATA:
733 case SECCAT_SBSS:
734 gcc_unreachable ();
735 case SECCAT_TEXT:
736 case SECCAT_TDATA:
737 case SECCAT_TBSS:
738 /* We don't split these for medium model. Place them into
739 default sections and hope for best. */
740 break;
742 if (sname)
744 /* We might get called with string constants, but get_named_section
745 doesn't like them as they are not DECLs. Also, we need to set
746 flags in that case. */
747 if (!DECL_P (decl))
748 return get_section (sname, flags, NULL);
749 return get_named_section (decl, sname, reloc);
752 return default_elf_select_section (decl, reloc, align);
755 /* Select a set of attributes for section NAME based on the properties
756 of DECL and whether or not RELOC indicates that DECL's initializer
757 might contain runtime relocations. */
759 static unsigned int ATTRIBUTE_UNUSED
760 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
762 unsigned int flags = default_section_type_flags (decl, name, reloc);
764 if (ix86_in_large_data_p (decl))
765 flags |= SECTION_LARGE;
767 if (decl == NULL_TREE
768 && (strcmp (name, ".ldata.rel.ro") == 0
769 || strcmp (name, ".ldata.rel.ro.local") == 0))
770 flags |= SECTION_RELRO;
772 if (strcmp (name, ".lbss") == 0
773 || strncmp (name, ".lbss.", 5) == 0
774 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
775 flags |= SECTION_BSS;
777 return flags;
780 /* Build up a unique section name, expressed as a
781 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
782 RELOC indicates whether the initial value of EXP requires
783 link-time relocations. */
785 static void ATTRIBUTE_UNUSED
786 x86_64_elf_unique_section (tree decl, int reloc)
788 if (ix86_in_large_data_p (decl))
790 const char *prefix = NULL;
791 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
792 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
794 switch (categorize_decl_for_section (decl, reloc))
796 case SECCAT_DATA:
797 case SECCAT_DATA_REL:
798 case SECCAT_DATA_REL_LOCAL:
799 case SECCAT_DATA_REL_RO:
800 case SECCAT_DATA_REL_RO_LOCAL:
801 prefix = one_only ? ".ld" : ".ldata";
802 break;
803 case SECCAT_BSS:
804 prefix = one_only ? ".lb" : ".lbss";
805 break;
806 case SECCAT_RODATA:
807 case SECCAT_RODATA_MERGE_STR:
808 case SECCAT_RODATA_MERGE_STR_INIT:
809 case SECCAT_RODATA_MERGE_CONST:
810 prefix = one_only ? ".lr" : ".lrodata";
811 break;
812 case SECCAT_SRODATA:
813 case SECCAT_SDATA:
814 case SECCAT_SBSS:
815 gcc_unreachable ();
816 case SECCAT_TEXT:
817 case SECCAT_TDATA:
818 case SECCAT_TBSS:
819 /* We don't split these for medium model. Place them into
820 default sections and hope for best. */
821 break;
823 if (prefix)
825 const char *name, *linkonce;
826 char *string;
828 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
829 name = targetm.strip_name_encoding (name);
831 /* If we're using one_only, then there needs to be a .gnu.linkonce
832 prefix to the section name. */
833 linkonce = one_only ? ".gnu.linkonce" : "";
835 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
837 set_decl_section_name (decl, string);
838 return;
841 default_unique_section (decl, reloc);
844 #ifdef COMMON_ASM_OP
846 #ifndef LARGECOMM_SECTION_ASM_OP
847 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
848 #endif
850 /* This says how to output assembler code to declare an
851 uninitialized external linkage data object.
853 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
854 large objects. */
855 void
856 x86_elf_aligned_decl_common (FILE *file, tree decl,
857 const char *name, unsigned HOST_WIDE_INT size,
858 int align)
860 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
861 && size > (unsigned int)ix86_section_threshold)
863 switch_to_section (get_named_section (decl, ".lbss", 0));
864 fputs (LARGECOMM_SECTION_ASM_OP, file);
866 else
867 fputs (COMMON_ASM_OP, file);
868 assemble_name (file, name);
869 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
870 size, align / BITS_PER_UNIT);
872 #endif
874 /* Utility function for targets to use in implementing
875 ASM_OUTPUT_ALIGNED_BSS. */
877 void
878 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
879 unsigned HOST_WIDE_INT size, int align)
881 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
882 && size > (unsigned int)ix86_section_threshold)
883 switch_to_section (get_named_section (decl, ".lbss", 0));
884 else
885 switch_to_section (bss_section);
886 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
887 #ifdef ASM_DECLARE_OBJECT_NAME
888 last_assemble_variable_decl = decl;
889 ASM_DECLARE_OBJECT_NAME (file, name, decl);
890 #else
891 /* Standard thing is just output label for the object. */
892 ASM_OUTPUT_LABEL (file, name);
893 #endif /* ASM_DECLARE_OBJECT_NAME */
894 ASM_OUTPUT_SKIP (file, size ? size : 1);
897 /* Decide whether we must probe the stack before any space allocation
898 on this target. It's essentially TARGET_STACK_PROBE except when
899 -fstack-check causes the stack to be already probed differently. */
901 bool
902 ix86_target_stack_probe (void)
904 /* Do not probe the stack twice if static stack checking is enabled. */
905 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
906 return false;
908 return TARGET_STACK_PROBE;
911 /* Decide whether we can make a sibling call to a function. DECL is the
912 declaration of the function being targeted by the call and EXP is the
913 CALL_EXPR representing the call. */
915 static bool
916 ix86_function_ok_for_sibcall (tree decl, tree exp)
918 tree type, decl_or_type;
919 rtx a, b;
920 bool bind_global = decl && !targetm.binds_local_p (decl);
922 if (ix86_function_naked (current_function_decl))
923 return false;
925 /* Sibling call isn't OK if there are no caller-saved registers
926 since all registers must be preserved before return. */
927 if (cfun->machine->no_caller_saved_registers)
928 return false;
930 /* If we are generating position-independent code, we cannot sibcall
931 optimize direct calls to global functions, as the PLT requires
932 %ebx be live. (Darwin does not have a PLT.) */
933 if (!TARGET_MACHO
934 && !TARGET_64BIT
935 && flag_pic
936 && flag_plt
937 && bind_global)
938 return false;
940 /* If we need to align the outgoing stack, then sibcalling would
941 unalign the stack, which may break the called function. */
942 if (ix86_minimum_incoming_stack_boundary (true)
943 < PREFERRED_STACK_BOUNDARY)
944 return false;
946 if (decl)
948 decl_or_type = decl;
949 type = TREE_TYPE (decl);
951 else
953 /* We're looking at the CALL_EXPR, we need the type of the function. */
954 type = CALL_EXPR_FN (exp); /* pointer expression */
955 type = TREE_TYPE (type); /* pointer type */
956 type = TREE_TYPE (type); /* function type */
957 decl_or_type = type;
960 /* Check that the return value locations are the same. Like
961 if we are returning floats on the 80387 register stack, we cannot
962 make a sibcall from a function that doesn't return a float to a
963 function that does or, conversely, from a function that does return
964 a float to a function that doesn't; the necessary stack adjustment
965 would not be executed. This is also the place we notice
966 differences in the return value ABI. Note that it is ok for one
967 of the functions to have void return type as long as the return
968 value of the other is passed in a register. */
969 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
970 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
971 cfun->decl, false);
972 if (STACK_REG_P (a) || STACK_REG_P (b))
974 if (!rtx_equal_p (a, b))
975 return false;
977 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
979 else if (!rtx_equal_p (a, b))
980 return false;
982 if (TARGET_64BIT)
984 /* The SYSV ABI has more call-clobbered registers;
985 disallow sibcalls from MS to SYSV. */
986 if (cfun->machine->call_abi == MS_ABI
987 && ix86_function_type_abi (type) == SYSV_ABI)
988 return false;
990 else
992 /* If this call is indirect, we'll need to be able to use a
993 call-clobbered register for the address of the target function.
994 Make sure that all such registers are not used for passing
995 parameters. Note that DLLIMPORT functions and call to global
996 function via GOT slot are indirect. */
997 if (!decl
998 || (bind_global && flag_pic && !flag_plt)
999 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
1000 || flag_force_indirect_call)
1002 /* Check if regparm >= 3 since arg_reg_available is set to
1003 false if regparm == 0. If regparm is 1 or 2, there is
1004 always a call-clobbered register available.
1006 ??? The symbol indirect call doesn't need a call-clobbered
1007 register. But we don't know if this is a symbol indirect
1008 call or not here. */
1009 if (ix86_function_regparm (type, decl) >= 3
1010 && !cfun->machine->arg_reg_available)
1011 return false;
1015 /* Otherwise okay. That also includes certain types of indirect calls. */
1016 return true;
1019 /* This function determines from TYPE the calling-convention. */
1021 unsigned int
1022 ix86_get_callcvt (const_tree type)
1024 unsigned int ret = 0;
1025 bool is_stdarg;
1026 tree attrs;
1028 if (TARGET_64BIT)
1029 return IX86_CALLCVT_CDECL;
1031 attrs = TYPE_ATTRIBUTES (type);
1032 if (attrs != NULL_TREE)
1034 if (lookup_attribute ("cdecl", attrs))
1035 ret |= IX86_CALLCVT_CDECL;
1036 else if (lookup_attribute ("stdcall", attrs))
1037 ret |= IX86_CALLCVT_STDCALL;
1038 else if (lookup_attribute ("fastcall", attrs))
1039 ret |= IX86_CALLCVT_FASTCALL;
1040 else if (lookup_attribute ("thiscall", attrs))
1041 ret |= IX86_CALLCVT_THISCALL;
1043 /* Regparam isn't allowed for thiscall and fastcall. */
1044 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1046 if (lookup_attribute ("regparm", attrs))
1047 ret |= IX86_CALLCVT_REGPARM;
1048 if (lookup_attribute ("sseregparm", attrs))
1049 ret |= IX86_CALLCVT_SSEREGPARM;
1052 if (IX86_BASE_CALLCVT(ret) != 0)
1053 return ret;
1056 is_stdarg = stdarg_p (type);
1057 if (TARGET_RTD && !is_stdarg)
1058 return IX86_CALLCVT_STDCALL | ret;
1060 if (ret != 0
1061 || is_stdarg
1062 || TREE_CODE (type) != METHOD_TYPE
1063 || ix86_function_type_abi (type) != MS_ABI)
1064 return IX86_CALLCVT_CDECL | ret;
1066 return IX86_CALLCVT_THISCALL;
1069 /* Return 0 if the attributes for two types are incompatible, 1 if they
1070 are compatible, and 2 if they are nearly compatible (which causes a
1071 warning to be generated). */
1073 static int
1074 ix86_comp_type_attributes (const_tree type1, const_tree type2)
1076 unsigned int ccvt1, ccvt2;
1078 if (TREE_CODE (type1) != FUNCTION_TYPE
1079 && TREE_CODE (type1) != METHOD_TYPE)
1080 return 1;
1082 ccvt1 = ix86_get_callcvt (type1);
1083 ccvt2 = ix86_get_callcvt (type2);
1084 if (ccvt1 != ccvt2)
1085 return 0;
1086 if (ix86_function_regparm (type1, NULL)
1087 != ix86_function_regparm (type2, NULL))
1088 return 0;
1090 return 1;
1093 /* Return the regparm value for a function with the indicated TYPE and DECL.
1094 DECL may be NULL when calling function indirectly
1095 or considering a libcall. */
1097 static int
1098 ix86_function_regparm (const_tree type, const_tree decl)
1100 tree attr;
1101 int regparm;
1102 unsigned int ccvt;
1104 if (TARGET_64BIT)
1105 return (ix86_function_type_abi (type) == SYSV_ABI
1106 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1107 ccvt = ix86_get_callcvt (type);
1108 regparm = ix86_regparm;
1110 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1112 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1113 if (attr)
1115 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1116 return regparm;
1119 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1120 return 2;
1121 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1122 return 1;
1124 /* Use register calling convention for local functions when possible. */
1125 if (decl
1126 && TREE_CODE (decl) == FUNCTION_DECL)
1128 cgraph_node *target = cgraph_node::get (decl);
1129 if (target)
1130 target = target->function_symbol ();
1132 /* Caller and callee must agree on the calling convention, so
1133 checking here just optimize means that with
1134 __attribute__((optimize (...))) caller could use regparm convention
1135 and callee not, or vice versa. Instead look at whether the callee
1136 is optimized or not. */
1137 if (target && opt_for_fn (target->decl, optimize)
1138 && !(profile_flag && !flag_fentry))
1140 cgraph_local_info *i = &target->local;
1141 if (i && i->local && i->can_change_signature)
1143 int local_regparm, globals = 0, regno;
1145 /* Make sure no regparm register is taken by a
1146 fixed register variable. */
1147 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1148 local_regparm++)
1149 if (fixed_regs[local_regparm])
1150 break;
1152 /* We don't want to use regparm(3) for nested functions as
1153 these use a static chain pointer in the third argument. */
1154 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1155 local_regparm = 2;
1157 /* Save a register for the split stack. */
1158 if (flag_split_stack)
1160 if (local_regparm == 3)
1161 local_regparm = 2;
1162 else if (local_regparm == 2
1163 && DECL_STATIC_CHAIN (target->decl))
1164 local_regparm = 1;
1167 /* Each fixed register usage increases register pressure,
1168 so less registers should be used for argument passing.
1169 This functionality can be overriden by an explicit
1170 regparm value. */
1171 for (regno = AX_REG; regno <= DI_REG; regno++)
1172 if (fixed_regs[regno])
1173 globals++;
1175 local_regparm
1176 = globals < local_regparm ? local_regparm - globals : 0;
1178 if (local_regparm > regparm)
1179 regparm = local_regparm;
1184 return regparm;
1187 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1188 DFmode (2) arguments in SSE registers for a function with the
1189 indicated TYPE and DECL. DECL may be NULL when calling function
1190 indirectly or considering a libcall. Return -1 if any FP parameter
1191 should be rejected by error. This is used in siutation we imply SSE
1192 calling convetion but the function is called from another function with
1193 SSE disabled. Otherwise return 0. */
1195 static int
1196 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1198 gcc_assert (!TARGET_64BIT);
1200 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1201 by the sseregparm attribute. */
1202 if (TARGET_SSEREGPARM
1203 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1205 if (!TARGET_SSE)
1207 if (warn)
1209 if (decl)
1210 error ("calling %qD with attribute sseregparm without "
1211 "SSE/SSE2 enabled", decl);
1212 else
1213 error ("calling %qT with attribute sseregparm without "
1214 "SSE/SSE2 enabled", type);
1216 return 0;
1219 return 2;
1222 if (!decl)
1223 return 0;
1225 cgraph_node *target = cgraph_node::get (decl);
1226 if (target)
1227 target = target->function_symbol ();
1229 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1230 (and DFmode for SSE2) arguments in SSE registers. */
1231 if (target
1232 /* TARGET_SSE_MATH */
1233 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1234 && opt_for_fn (target->decl, optimize)
1235 && !(profile_flag && !flag_fentry))
1237 cgraph_local_info *i = &target->local;
1238 if (i && i->local && i->can_change_signature)
1240 /* Refuse to produce wrong code when local function with SSE enabled
1241 is called from SSE disabled function.
1242 FIXME: We need a way to detect these cases cross-ltrans partition
1243 and avoid using SSE calling conventions on local functions called
1244 from function with SSE disabled. For now at least delay the
1245 warning until we know we are going to produce wrong code.
1246 See PR66047 */
1247 if (!TARGET_SSE && warn)
1248 return -1;
1249 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1250 ->x_ix86_isa_flags) ? 2 : 1;
1254 return 0;
1257 /* Return true if EAX is live at the start of the function. Used by
1258 ix86_expand_prologue to determine if we need special help before
1259 calling allocate_stack_worker. */
1261 static bool
1262 ix86_eax_live_at_start_p (void)
1264 /* Cheat. Don't bother working forward from ix86_function_regparm
1265 to the function type to whether an actual argument is located in
1266 eax. Instead just look at cfg info, which is still close enough
1267 to correct at this point. This gives false positives for broken
1268 functions that might use uninitialized data that happens to be
1269 allocated in eax, but who cares? */
1270 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1273 static bool
1274 ix86_keep_aggregate_return_pointer (tree fntype)
1276 tree attr;
1278 if (!TARGET_64BIT)
1280 attr = lookup_attribute ("callee_pop_aggregate_return",
1281 TYPE_ATTRIBUTES (fntype));
1282 if (attr)
1283 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1285 /* For 32-bit MS-ABI the default is to keep aggregate
1286 return pointer. */
1287 if (ix86_function_type_abi (fntype) == MS_ABI)
1288 return true;
1290 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1293 /* Value is the number of bytes of arguments automatically
1294 popped when returning from a subroutine call.
1295 FUNDECL is the declaration node of the function (as a tree),
1296 FUNTYPE is the data type of the function (as a tree),
1297 or for a library call it is an identifier node for the subroutine name.
1298 SIZE is the number of bytes of arguments passed on the stack.
1300 On the 80386, the RTD insn may be used to pop them if the number
1301 of args is fixed, but if the number is variable then the caller
1302 must pop them all. RTD can't be used for library calls now
1303 because the library is compiled with the Unix compiler.
1304 Use of RTD is a selectable option, since it is incompatible with
1305 standard Unix calling sequences. If the option is not selected,
1306 the caller must always pop the args.
1308 The attribute stdcall is equivalent to RTD on a per module basis. */
1310 static poly_int64
1311 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1313 unsigned int ccvt;
1315 /* None of the 64-bit ABIs pop arguments. */
1316 if (TARGET_64BIT)
1317 return 0;
1319 ccvt = ix86_get_callcvt (funtype);
1321 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1322 | IX86_CALLCVT_THISCALL)) != 0
1323 && ! stdarg_p (funtype))
1324 return size;
1326 /* Lose any fake structure return argument if it is passed on the stack. */
1327 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1328 && !ix86_keep_aggregate_return_pointer (funtype))
1330 int nregs = ix86_function_regparm (funtype, fundecl);
1331 if (nregs == 0)
1332 return GET_MODE_SIZE (Pmode);
1335 return 0;
1338 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1340 static bool
1341 ix86_legitimate_combined_insn (rtx_insn *insn)
1343 int i;
1345 /* Check operand constraints in case hard registers were propagated
1346 into insn pattern. This check prevents combine pass from
1347 generating insn patterns with invalid hard register operands.
1348 These invalid insns can eventually confuse reload to error out
1349 with a spill failure. See also PRs 46829 and 46843. */
1351 gcc_assert (INSN_CODE (insn) >= 0);
1353 extract_insn (insn);
1354 preprocess_constraints (insn);
1356 int n_operands = recog_data.n_operands;
1357 int n_alternatives = recog_data.n_alternatives;
1358 for (i = 0; i < n_operands; i++)
1360 rtx op = recog_data.operand[i];
1361 machine_mode mode = GET_MODE (op);
1362 const operand_alternative *op_alt;
1363 int offset = 0;
1364 bool win;
1365 int j;
1367 /* A unary operator may be accepted by the predicate, but it
1368 is irrelevant for matching constraints. */
1369 if (UNARY_P (op))
1370 op = XEXP (op, 0);
1372 if (SUBREG_P (op))
1374 if (REG_P (SUBREG_REG (op))
1375 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1376 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1377 GET_MODE (SUBREG_REG (op)),
1378 SUBREG_BYTE (op),
1379 GET_MODE (op));
1380 op = SUBREG_REG (op);
1383 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1384 continue;
1386 op_alt = recog_op_alt;
1388 /* Operand has no constraints, anything is OK. */
1389 win = !n_alternatives;
1391 alternative_mask preferred = get_preferred_alternatives (insn);
1392 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1394 if (!TEST_BIT (preferred, j))
1395 continue;
1396 if (op_alt[i].anything_ok
1397 || (op_alt[i].matches != -1
1398 && operands_match_p
1399 (recog_data.operand[i],
1400 recog_data.operand[op_alt[i].matches]))
1401 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1403 win = true;
1404 break;
1408 if (!win)
1409 return false;
1412 return true;
1415 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1417 static unsigned HOST_WIDE_INT
1418 ix86_asan_shadow_offset (void)
1420 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
1421 : HOST_WIDE_INT_C (0x7fff8000))
1422 : (HOST_WIDE_INT_1 << 29);
1425 /* Argument support functions. */
1427 /* Return true when register may be used to pass function parameters. */
1428 bool
1429 ix86_function_arg_regno_p (int regno)
1431 int i;
1432 enum calling_abi call_abi;
1433 const int *parm_regs;
1435 if (!TARGET_64BIT)
1437 if (TARGET_MACHO)
1438 return (regno < REGPARM_MAX
1439 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1440 else
1441 return (regno < REGPARM_MAX
1442 || (TARGET_MMX && MMX_REGNO_P (regno)
1443 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
1444 || (TARGET_SSE && SSE_REGNO_P (regno)
1445 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
1448 if (TARGET_SSE && SSE_REGNO_P (regno)
1449 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
1450 return true;
1452 /* TODO: The function should depend on current function ABI but
1453 builtins.c would need updating then. Therefore we use the
1454 default ABI. */
1455 call_abi = ix86_cfun_abi ();
1457 /* RAX is used as hidden argument to va_arg functions. */
1458 if (call_abi == SYSV_ABI && regno == AX_REG)
1459 return true;
1461 if (call_abi == MS_ABI)
1462 parm_regs = x86_64_ms_abi_int_parameter_registers;
1463 else
1464 parm_regs = x86_64_int_parameter_registers;
1466 for (i = 0; i < (call_abi == MS_ABI
1467 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1468 if (regno == parm_regs[i])
1469 return true;
1470 return false;
1473 /* Return if we do not know how to pass TYPE solely in registers. */
1475 static bool
1476 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
1478 if (must_pass_in_stack_var_size_or_pad (mode, type))
1479 return true;
1481 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1482 The layout_type routine is crafty and tries to trick us into passing
1483 currently unsupported vector types on the stack by using TImode. */
1484 return (!TARGET_64BIT && mode == TImode
1485 && type && TREE_CODE (type) != VECTOR_TYPE);
1488 /* It returns the size, in bytes, of the area reserved for arguments passed
1489 in registers for the function represented by fndecl dependent to the used
1490 abi format. */
1492 ix86_reg_parm_stack_space (const_tree fndecl)
1494 enum calling_abi call_abi = SYSV_ABI;
1495 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1496 call_abi = ix86_function_abi (fndecl);
1497 else
1498 call_abi = ix86_function_type_abi (fndecl);
1499 if (TARGET_64BIT && call_abi == MS_ABI)
1500 return 32;
1501 return 0;
1504 /* We add this as a workaround in order to use libc_has_function
1505 hook in i386.md. */
1506 bool
1507 ix86_libc_has_function (enum function_class fn_class)
1509 return targetm.libc_has_function (fn_class);
1512 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1513 specifying the call abi used. */
1514 enum calling_abi
1515 ix86_function_type_abi (const_tree fntype)
1517 enum calling_abi abi = ix86_abi;
1519 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1520 return abi;
1522 if (abi == SYSV_ABI
1523 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1525 static int warned;
1526 if (TARGET_X32 && !warned)
1528 error ("X32 does not support %<ms_abi%> attribute");
1529 warned = 1;
1532 abi = MS_ABI;
1534 else if (abi == MS_ABI
1535 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1536 abi = SYSV_ABI;
1538 return abi;
1541 enum calling_abi
1542 ix86_function_abi (const_tree fndecl)
1544 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1547 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1548 specifying the call abi used. */
1549 enum calling_abi
1550 ix86_cfun_abi (void)
1552 return cfun ? cfun->machine->call_abi : ix86_abi;
1555 bool
1556 ix86_function_ms_hook_prologue (const_tree fn)
1558 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1560 if (decl_function_context (fn) != NULL_TREE)
1561 error_at (DECL_SOURCE_LOCATION (fn),
1562 "%<ms_hook_prologue%> attribute is not compatible "
1563 "with nested function");
1564 else
1565 return true;
1567 return false;
1570 bool
1571 ix86_function_naked (const_tree fn)
1573 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1574 return true;
1576 return false;
1579 /* Write the extra assembler code needed to declare a function properly. */
1581 void
1582 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
1583 tree decl)
1585 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1587 if (is_ms_hook)
1589 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1590 unsigned int filler_cc = 0xcccccccc;
1592 for (i = 0; i < filler_count; i += 4)
1593 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
1596 #ifdef SUBTARGET_ASM_UNWIND_INIT
1597 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
1598 #endif
1600 ASM_OUTPUT_LABEL (asm_out_file, fname);
1602 /* Output magic byte marker, if hot-patch attribute is set. */
1603 if (is_ms_hook)
1605 if (TARGET_64BIT)
1607 /* leaq [%rsp + 0], %rsp */
1608 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1609 asm_out_file);
1611 else
1613 /* movl.s %edi, %edi
1614 push %ebp
1615 movl.s %esp, %ebp */
1616 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
1621 /* Implementation of call abi switching target hook. Specific to FNDECL
1622 the specific call register sets are set. See also
1623 ix86_conditional_register_usage for more details. */
1624 void
1625 ix86_call_abi_override (const_tree fndecl)
1627 cfun->machine->call_abi = ix86_function_abi (fndecl);
1630 /* Return 1 if pseudo register should be created and used to hold
1631 GOT address for PIC code. */
1632 bool
1633 ix86_use_pseudo_pic_reg (void)
1635 if ((TARGET_64BIT
1636 && (ix86_cmodel == CM_SMALL_PIC
1637 || TARGET_PECOFF))
1638 || !flag_pic)
1639 return false;
1640 return true;
1643 /* Initialize large model PIC register. */
1645 static void
1646 ix86_init_large_pic_reg (unsigned int tmp_regno)
1648 rtx_code_label *label;
1649 rtx tmp_reg;
1651 gcc_assert (Pmode == DImode);
1652 label = gen_label_rtx ();
1653 emit_label (label);
1654 LABEL_PRESERVE_P (label) = 1;
1655 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1656 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1657 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1658 label));
1659 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1660 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
1661 pic_offset_table_rtx, tmp_reg));
1662 const char *name = LABEL_NAME (label);
1663 PUT_CODE (label, NOTE);
1664 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1665 NOTE_DELETED_LABEL_NAME (label) = name;
1668 /* Create and initialize PIC register if required. */
1669 static void
1670 ix86_init_pic_reg (void)
1672 edge entry_edge;
1673 rtx_insn *seq;
1675 if (!ix86_use_pseudo_pic_reg ())
1676 return;
1678 start_sequence ();
1680 if (TARGET_64BIT)
1682 if (ix86_cmodel == CM_LARGE_PIC)
1683 ix86_init_large_pic_reg (R11_REG);
1684 else
1685 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1687 else
1689 /* If there is future mcount call in the function it is more profitable
1690 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1691 rtx reg = crtl->profile
1692 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1693 : pic_offset_table_rtx;
1694 rtx_insn *insn = emit_insn (gen_set_got (reg));
1695 RTX_FRAME_RELATED_P (insn) = 1;
1696 if (crtl->profile)
1697 emit_move_insn (pic_offset_table_rtx, reg);
1698 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1701 seq = get_insns ();
1702 end_sequence ();
1704 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1705 insert_insn_on_edge (seq, entry_edge);
1706 commit_one_edge_insertion (entry_edge);
1709 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1710 for a call to a function whose data type is FNTYPE.
1711 For a library call, FNTYPE is 0. */
1713 void
1714 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1715 tree fntype, /* tree ptr for function decl */
1716 rtx libname, /* SYMBOL_REF of library name or 0 */
1717 tree fndecl,
1718 int caller)
1720 struct cgraph_local_info *i = NULL;
1721 struct cgraph_node *target = NULL;
1723 memset (cum, 0, sizeof (*cum));
1725 if (fndecl)
1727 target = cgraph_node::get (fndecl);
1728 if (target)
1730 target = target->function_symbol ();
1731 i = cgraph_node::local_info (target->decl);
1732 cum->call_abi = ix86_function_abi (target->decl);
1734 else
1735 cum->call_abi = ix86_function_abi (fndecl);
1737 else
1738 cum->call_abi = ix86_function_type_abi (fntype);
1740 cum->caller = caller;
1742 /* Set up the number of registers to use for passing arguments. */
1743 cum->nregs = ix86_regparm;
1744 if (TARGET_64BIT)
1746 cum->nregs = (cum->call_abi == SYSV_ABI
1747 ? X86_64_REGPARM_MAX
1748 : X86_64_MS_REGPARM_MAX);
1750 if (TARGET_SSE)
1752 cum->sse_nregs = SSE_REGPARM_MAX;
1753 if (TARGET_64BIT)
1755 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1756 ? X86_64_SSE_REGPARM_MAX
1757 : X86_64_MS_SSE_REGPARM_MAX);
1760 if (TARGET_MMX)
1761 cum->mmx_nregs = MMX_REGPARM_MAX;
1762 cum->warn_avx512f = true;
1763 cum->warn_avx = true;
1764 cum->warn_sse = true;
1765 cum->warn_mmx = true;
1767 /* Because type might mismatch in between caller and callee, we need to
1768 use actual type of function for local calls.
1769 FIXME: cgraph_analyze can be told to actually record if function uses
1770 va_start so for local functions maybe_vaarg can be made aggressive
1771 helping K&R code.
1772 FIXME: once typesytem is fixed, we won't need this code anymore. */
1773 if (i && i->local && i->can_change_signature)
1774 fntype = TREE_TYPE (target->decl);
1775 cum->stdarg = stdarg_p (fntype);
1776 cum->maybe_vaarg = (fntype
1777 ? (!prototype_p (fntype) || stdarg_p (fntype))
1778 : !libname);
1780 cum->decl = fndecl;
1782 cum->warn_empty = !warn_abi || cum->stdarg;
1783 if (!cum->warn_empty && fntype)
1785 function_args_iterator iter;
1786 tree argtype;
1787 bool seen_empty_type = false;
1788 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1790 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1791 break;
1792 if (TYPE_EMPTY_P (argtype))
1793 seen_empty_type = true;
1794 else if (seen_empty_type)
1796 cum->warn_empty = true;
1797 break;
1802 if (!TARGET_64BIT)
1804 /* If there are variable arguments, then we won't pass anything
1805 in registers in 32-bit mode. */
1806 if (stdarg_p (fntype))
1808 cum->nregs = 0;
1809 /* Since in 32-bit, variable arguments are always passed on
1810 stack, there is scratch register available for indirect
1811 sibcall. */
1812 cfun->machine->arg_reg_available = true;
1813 cum->sse_nregs = 0;
1814 cum->mmx_nregs = 0;
1815 cum->warn_avx512f = false;
1816 cum->warn_avx = false;
1817 cum->warn_sse = false;
1818 cum->warn_mmx = false;
1819 return;
1822 /* Use ecx and edx registers if function has fastcall attribute,
1823 else look for regparm information. */
1824 if (fntype)
1826 unsigned int ccvt = ix86_get_callcvt (fntype);
1827 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1829 cum->nregs = 1;
1830 cum->fastcall = 1; /* Same first register as in fastcall. */
1832 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1834 cum->nregs = 2;
1835 cum->fastcall = 1;
1837 else
1838 cum->nregs = ix86_function_regparm (fntype, fndecl);
1841 /* Set up the number of SSE registers used for passing SFmode
1842 and DFmode arguments. Warn for mismatching ABI. */
1843 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
1846 cfun->machine->arg_reg_available = (cum->nregs > 0);
1849 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1850 But in the case of vector types, it is some vector mode.
1852 When we have only some of our vector isa extensions enabled, then there
1853 are some modes for which vector_mode_supported_p is false. For these
1854 modes, the generic vector support in gcc will choose some non-vector mode
1855 in order to implement the type. By computing the natural mode, we'll
1856 select the proper ABI location for the operand and not depend on whatever
1857 the middle-end decides to do with these vector types.
1859 The midde-end can't deal with the vector types > 16 bytes. In this
1860 case, we return the original mode and warn ABI change if CUM isn't
1861 NULL.
1863 If INT_RETURN is true, warn ABI change if the vector mode isn't
1864 available for function return value. */
1866 static machine_mode
1867 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1868 bool in_return)
1870 machine_mode mode = TYPE_MODE (type);
1872 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
1874 HOST_WIDE_INT size = int_size_in_bytes (type);
1875 if ((size == 8 || size == 16 || size == 32 || size == 64)
1876 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1877 && TYPE_VECTOR_SUBPARTS (type) > 1)
1879 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1881 /* There are no XFmode vector modes. */
1882 if (innermode == XFmode)
1883 return mode;
1885 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
1886 mode = MIN_MODE_VECTOR_FLOAT;
1887 else
1888 mode = MIN_MODE_VECTOR_INT;
1890 /* Get the mode which has this inner mode and number of units. */
1891 FOR_EACH_MODE_FROM (mode, mode)
1892 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
1893 && GET_MODE_INNER (mode) == innermode)
1895 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
1897 static bool warnedavx512f;
1898 static bool warnedavx512f_ret;
1900 if (cum && cum->warn_avx512f && !warnedavx512f)
1902 if (warning (OPT_Wpsabi, "AVX512F vector argument "
1903 "without AVX512F enabled changes the ABI"))
1904 warnedavx512f = true;
1906 else if (in_return && !warnedavx512f_ret)
1908 if (warning (OPT_Wpsabi, "AVX512F vector return "
1909 "without AVX512F enabled changes the ABI"))
1910 warnedavx512f_ret = true;
1913 return TYPE_MODE (type);
1915 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
1917 static bool warnedavx;
1918 static bool warnedavx_ret;
1920 if (cum && cum->warn_avx && !warnedavx)
1922 if (warning (OPT_Wpsabi, "AVX vector argument "
1923 "without AVX enabled changes the ABI"))
1924 warnedavx = true;
1926 else if (in_return && !warnedavx_ret)
1928 if (warning (OPT_Wpsabi, "AVX vector return "
1929 "without AVX enabled changes the ABI"))
1930 warnedavx_ret = true;
1933 return TYPE_MODE (type);
1935 else if (((size == 8 && TARGET_64BIT) || size == 16)
1936 && !TARGET_SSE
1937 && !TARGET_IAMCU)
1939 static bool warnedsse;
1940 static bool warnedsse_ret;
1942 if (cum && cum->warn_sse && !warnedsse)
1944 if (warning (OPT_Wpsabi, "SSE vector argument "
1945 "without SSE enabled changes the ABI"))
1946 warnedsse = true;
1948 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
1950 if (warning (OPT_Wpsabi, "SSE vector return "
1951 "without SSE enabled changes the ABI"))
1952 warnedsse_ret = true;
1955 else if ((size == 8 && !TARGET_64BIT)
1956 && (!cfun
1957 || cfun->machine->func_type == TYPE_NORMAL)
1958 && !TARGET_MMX
1959 && !TARGET_IAMCU)
1961 static bool warnedmmx;
1962 static bool warnedmmx_ret;
1964 if (cum && cum->warn_mmx && !warnedmmx)
1966 if (warning (OPT_Wpsabi, "MMX vector argument "
1967 "without MMX enabled changes the ABI"))
1968 warnedmmx = true;
1970 else if (in_return && !warnedmmx_ret)
1972 if (warning (OPT_Wpsabi, "MMX vector return "
1973 "without MMX enabled changes the ABI"))
1974 warnedmmx_ret = true;
1977 return mode;
1980 gcc_unreachable ();
1984 return mode;
1987 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
1988 this may not agree with the mode that the type system has chosen for the
1989 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
1990 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
1992 static rtx
1993 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
1994 unsigned int regno)
1996 rtx tmp;
1998 if (orig_mode != BLKmode)
1999 tmp = gen_rtx_REG (orig_mode, regno);
2000 else
2002 tmp = gen_rtx_REG (mode, regno);
2003 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2004 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2007 return tmp;
2010 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2011 of this code is to classify each 8bytes of incoming argument by the register
2012 class and assign registers accordingly. */
2014 /* Return the union class of CLASS1 and CLASS2.
2015 See the x86-64 PS ABI for details. */
2017 static enum x86_64_reg_class
2018 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2020 /* Rule #1: If both classes are equal, this is the resulting class. */
2021 if (class1 == class2)
2022 return class1;
2024 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2025 the other class. */
2026 if (class1 == X86_64_NO_CLASS)
2027 return class2;
2028 if (class2 == X86_64_NO_CLASS)
2029 return class1;
2031 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2032 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2033 return X86_64_MEMORY_CLASS;
2035 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2036 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2037 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2038 return X86_64_INTEGERSI_CLASS;
2039 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2040 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2041 return X86_64_INTEGER_CLASS;
2043 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2044 MEMORY is used. */
2045 if (class1 == X86_64_X87_CLASS
2046 || class1 == X86_64_X87UP_CLASS
2047 || class1 == X86_64_COMPLEX_X87_CLASS
2048 || class2 == X86_64_X87_CLASS
2049 || class2 == X86_64_X87UP_CLASS
2050 || class2 == X86_64_COMPLEX_X87_CLASS)
2051 return X86_64_MEMORY_CLASS;
2053 /* Rule #6: Otherwise class SSE is used. */
2054 return X86_64_SSE_CLASS;
2057 /* Classify the argument of type TYPE and mode MODE.
2058 CLASSES will be filled by the register class used to pass each word
2059 of the operand. The number of words is returned. In case the parameter
2060 should be passed in memory, 0 is returned. As a special case for zero
2061 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2063 BIT_OFFSET is used internally for handling records and specifies offset
2064 of the offset in bits modulo 512 to avoid overflow cases.
2066 See the x86-64 PS ABI for details.
2069 static int
2070 classify_argument (machine_mode mode, const_tree type,
2071 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2073 HOST_WIDE_INT bytes
2074 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2075 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2077 /* Variable sized entities are always passed/returned in memory. */
2078 if (bytes < 0)
2079 return 0;
2081 if (mode != VOIDmode
2082 && targetm.calls.must_pass_in_stack (mode, type))
2083 return 0;
2085 if (type && AGGREGATE_TYPE_P (type))
2087 int i;
2088 tree field;
2089 enum x86_64_reg_class subclasses[MAX_CLASSES];
2091 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2092 if (bytes > 64)
2093 return 0;
2095 for (i = 0; i < words; i++)
2096 classes[i] = X86_64_NO_CLASS;
2098 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2099 signalize memory class, so handle it as special case. */
2100 if (!words)
2102 classes[0] = X86_64_NO_CLASS;
2103 return 1;
2106 /* Classify each field of record and merge classes. */
2107 switch (TREE_CODE (type))
2109 case RECORD_TYPE:
2110 /* And now merge the fields of structure. */
2111 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2113 if (TREE_CODE (field) == FIELD_DECL)
2115 int num;
2117 if (TREE_TYPE (field) == error_mark_node)
2118 continue;
2120 /* Bitfields are always classified as integer. Handle them
2121 early, since later code would consider them to be
2122 misaligned integers. */
2123 if (DECL_BIT_FIELD (field))
2125 for (i = (int_bit_position (field)
2126 + (bit_offset % 64)) / 8 / 8;
2127 i < ((int_bit_position (field) + (bit_offset % 64))
2128 + tree_to_shwi (DECL_SIZE (field))
2129 + 63) / 8 / 8; i++)
2130 classes[i]
2131 = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2133 else
2135 int pos;
2137 type = TREE_TYPE (field);
2139 /* Flexible array member is ignored. */
2140 if (TYPE_MODE (type) == BLKmode
2141 && TREE_CODE (type) == ARRAY_TYPE
2142 && TYPE_SIZE (type) == NULL_TREE
2143 && TYPE_DOMAIN (type) != NULL_TREE
2144 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2145 == NULL_TREE))
2147 static bool warned;
2149 if (!warned && warn_psabi)
2151 warned = true;
2152 inform (input_location,
2153 "the ABI of passing struct with"
2154 " a flexible array member has"
2155 " changed in GCC 4.4");
2157 continue;
2159 num = classify_argument (TYPE_MODE (type), type,
2160 subclasses,
2161 (int_bit_position (field)
2162 + bit_offset) % 512);
2163 if (!num)
2164 return 0;
2165 pos = (int_bit_position (field)
2166 + (bit_offset % 64)) / 8 / 8;
2167 for (i = 0; i < num && (i + pos) < words; i++)
2168 classes[i + pos]
2169 = merge_classes (subclasses[i], classes[i + pos]);
2173 break;
2175 case ARRAY_TYPE:
2176 /* Arrays are handled as small records. */
2178 int num;
2179 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2180 TREE_TYPE (type), subclasses, bit_offset);
2181 if (!num)
2182 return 0;
2184 /* The partial classes are now full classes. */
2185 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2186 subclasses[0] = X86_64_SSE_CLASS;
2187 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2188 && !((bit_offset % 64) == 0 && bytes == 4))
2189 subclasses[0] = X86_64_INTEGER_CLASS;
2191 for (i = 0; i < words; i++)
2192 classes[i] = subclasses[i % num];
2194 break;
2196 case UNION_TYPE:
2197 case QUAL_UNION_TYPE:
2198 /* Unions are similar to RECORD_TYPE but offset is always 0.
2200 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2202 if (TREE_CODE (field) == FIELD_DECL)
2204 int num;
2206 if (TREE_TYPE (field) == error_mark_node)
2207 continue;
2209 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2210 TREE_TYPE (field), subclasses,
2211 bit_offset);
2212 if (!num)
2213 return 0;
2214 for (i = 0; i < num && i < words; i++)
2215 classes[i] = merge_classes (subclasses[i], classes[i]);
2218 break;
2220 default:
2221 gcc_unreachable ();
2224 if (words > 2)
2226 /* When size > 16 bytes, if the first one isn't
2227 X86_64_SSE_CLASS or any other ones aren't
2228 X86_64_SSEUP_CLASS, everything should be passed in
2229 memory. */
2230 if (classes[0] != X86_64_SSE_CLASS)
2231 return 0;
2233 for (i = 1; i < words; i++)
2234 if (classes[i] != X86_64_SSEUP_CLASS)
2235 return 0;
2238 /* Final merger cleanup. */
2239 for (i = 0; i < words; i++)
2241 /* If one class is MEMORY, everything should be passed in
2242 memory. */
2243 if (classes[i] == X86_64_MEMORY_CLASS)
2244 return 0;
2246 /* The X86_64_SSEUP_CLASS should be always preceded by
2247 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2248 if (classes[i] == X86_64_SSEUP_CLASS
2249 && classes[i - 1] != X86_64_SSE_CLASS
2250 && classes[i - 1] != X86_64_SSEUP_CLASS)
2252 /* The first one should never be X86_64_SSEUP_CLASS. */
2253 gcc_assert (i != 0);
2254 classes[i] = X86_64_SSE_CLASS;
2257 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2258 everything should be passed in memory. */
2259 if (classes[i] == X86_64_X87UP_CLASS
2260 && (classes[i - 1] != X86_64_X87_CLASS))
2262 static bool warned;
2264 /* The first one should never be X86_64_X87UP_CLASS. */
2265 gcc_assert (i != 0);
2266 if (!warned && warn_psabi)
2268 warned = true;
2269 inform (input_location,
2270 "the ABI of passing union with %<long double%>"
2271 " has changed in GCC 4.4");
2273 return 0;
2276 return words;
2279 /* Compute alignment needed. We align all types to natural boundaries with
2280 exception of XFmode that is aligned to 64bits. */
2281 if (mode != VOIDmode && mode != BLKmode)
2283 int mode_alignment = GET_MODE_BITSIZE (mode);
2285 if (mode == XFmode)
2286 mode_alignment = 128;
2287 else if (mode == XCmode)
2288 mode_alignment = 256;
2289 if (COMPLEX_MODE_P (mode))
2290 mode_alignment /= 2;
2291 /* Misaligned fields are always returned in memory. */
2292 if (bit_offset % mode_alignment)
2293 return 0;
2296 /* for V1xx modes, just use the base mode */
2297 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2298 && GET_MODE_UNIT_SIZE (mode) == bytes)
2299 mode = GET_MODE_INNER (mode);
2301 /* Classification of atomic types. */
2302 switch (mode)
2304 case E_SDmode:
2305 case E_DDmode:
2306 classes[0] = X86_64_SSE_CLASS;
2307 return 1;
2308 case E_TDmode:
2309 classes[0] = X86_64_SSE_CLASS;
2310 classes[1] = X86_64_SSEUP_CLASS;
2311 return 2;
2312 case E_DImode:
2313 case E_SImode:
2314 case E_HImode:
2315 case E_QImode:
2316 case E_CSImode:
2317 case E_CHImode:
2318 case E_CQImode:
2320 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2322 /* Analyze last 128 bits only. */
2323 size = (size - 1) & 0x7f;
2325 if (size < 32)
2327 classes[0] = X86_64_INTEGERSI_CLASS;
2328 return 1;
2330 else if (size < 64)
2332 classes[0] = X86_64_INTEGER_CLASS;
2333 return 1;
2335 else if (size < 64+32)
2337 classes[0] = X86_64_INTEGER_CLASS;
2338 classes[1] = X86_64_INTEGERSI_CLASS;
2339 return 2;
2341 else if (size < 64+64)
2343 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2344 return 2;
2346 else
2347 gcc_unreachable ();
2349 case E_CDImode:
2350 case E_TImode:
2351 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2352 return 2;
2353 case E_COImode:
2354 case E_OImode:
2355 /* OImode shouldn't be used directly. */
2356 gcc_unreachable ();
2357 case E_CTImode:
2358 return 0;
2359 case E_SFmode:
2360 if (!(bit_offset % 64))
2361 classes[0] = X86_64_SSESF_CLASS;
2362 else
2363 classes[0] = X86_64_SSE_CLASS;
2364 return 1;
2365 case E_DFmode:
2366 classes[0] = X86_64_SSEDF_CLASS;
2367 return 1;
2368 case E_XFmode:
2369 classes[0] = X86_64_X87_CLASS;
2370 classes[1] = X86_64_X87UP_CLASS;
2371 return 2;
2372 case E_TFmode:
2373 classes[0] = X86_64_SSE_CLASS;
2374 classes[1] = X86_64_SSEUP_CLASS;
2375 return 2;
2376 case E_SCmode:
2377 classes[0] = X86_64_SSE_CLASS;
2378 if (!(bit_offset % 64))
2379 return 1;
2380 else
2382 static bool warned;
2384 if (!warned && warn_psabi)
2386 warned = true;
2387 inform (input_location,
2388 "the ABI of passing structure with %<complex float%>"
2389 " member has changed in GCC 4.4");
2391 classes[1] = X86_64_SSESF_CLASS;
2392 return 2;
2394 case E_DCmode:
2395 classes[0] = X86_64_SSEDF_CLASS;
2396 classes[1] = X86_64_SSEDF_CLASS;
2397 return 2;
2398 case E_XCmode:
2399 classes[0] = X86_64_COMPLEX_X87_CLASS;
2400 return 1;
2401 case E_TCmode:
2402 /* This modes is larger than 16 bytes. */
2403 return 0;
2404 case E_V8SFmode:
2405 case E_V8SImode:
2406 case E_V32QImode:
2407 case E_V16HImode:
2408 case E_V4DFmode:
2409 case E_V4DImode:
2410 classes[0] = X86_64_SSE_CLASS;
2411 classes[1] = X86_64_SSEUP_CLASS;
2412 classes[2] = X86_64_SSEUP_CLASS;
2413 classes[3] = X86_64_SSEUP_CLASS;
2414 return 4;
2415 case E_V8DFmode:
2416 case E_V16SFmode:
2417 case E_V8DImode:
2418 case E_V16SImode:
2419 case E_V32HImode:
2420 case E_V64QImode:
2421 classes[0] = X86_64_SSE_CLASS;
2422 classes[1] = X86_64_SSEUP_CLASS;
2423 classes[2] = X86_64_SSEUP_CLASS;
2424 classes[3] = X86_64_SSEUP_CLASS;
2425 classes[4] = X86_64_SSEUP_CLASS;
2426 classes[5] = X86_64_SSEUP_CLASS;
2427 classes[6] = X86_64_SSEUP_CLASS;
2428 classes[7] = X86_64_SSEUP_CLASS;
2429 return 8;
2430 case E_V4SFmode:
2431 case E_V4SImode:
2432 case E_V16QImode:
2433 case E_V8HImode:
2434 case E_V2DFmode:
2435 case E_V2DImode:
2436 classes[0] = X86_64_SSE_CLASS;
2437 classes[1] = X86_64_SSEUP_CLASS;
2438 return 2;
2439 case E_V1TImode:
2440 case E_V1DImode:
2441 case E_V2SFmode:
2442 case E_V2SImode:
2443 case E_V4HImode:
2444 case E_V8QImode:
2445 classes[0] = X86_64_SSE_CLASS;
2446 return 1;
2447 case E_BLKmode:
2448 case E_VOIDmode:
2449 return 0;
2450 default:
2451 gcc_assert (VECTOR_MODE_P (mode));
2453 if (bytes > 16)
2454 return 0;
2456 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2458 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2459 classes[0] = X86_64_INTEGERSI_CLASS;
2460 else
2461 classes[0] = X86_64_INTEGER_CLASS;
2462 classes[1] = X86_64_INTEGER_CLASS;
2463 return 1 + (bytes > 8);
2467 /* Examine the argument and return set number of register required in each
2468 class. Return true iff parameter should be passed in memory. */
2470 static bool
2471 examine_argument (machine_mode mode, const_tree type, int in_return,
2472 int *int_nregs, int *sse_nregs)
2474 enum x86_64_reg_class regclass[MAX_CLASSES];
2475 int n = classify_argument (mode, type, regclass, 0);
2477 *int_nregs = 0;
2478 *sse_nregs = 0;
2480 if (!n)
2481 return true;
2482 for (n--; n >= 0; n--)
2483 switch (regclass[n])
2485 case X86_64_INTEGER_CLASS:
2486 case X86_64_INTEGERSI_CLASS:
2487 (*int_nregs)++;
2488 break;
2489 case X86_64_SSE_CLASS:
2490 case X86_64_SSESF_CLASS:
2491 case X86_64_SSEDF_CLASS:
2492 (*sse_nregs)++;
2493 break;
2494 case X86_64_NO_CLASS:
2495 case X86_64_SSEUP_CLASS:
2496 break;
2497 case X86_64_X87_CLASS:
2498 case X86_64_X87UP_CLASS:
2499 case X86_64_COMPLEX_X87_CLASS:
2500 if (!in_return)
2501 return true;
2502 break;
2503 case X86_64_MEMORY_CLASS:
2504 gcc_unreachable ();
2507 return false;
2510 /* Construct container for the argument used by GCC interface. See
2511 FUNCTION_ARG for the detailed description. */
2513 static rtx
2514 construct_container (machine_mode mode, machine_mode orig_mode,
2515 const_tree type, int in_return, int nintregs, int nsseregs,
2516 const int *intreg, int sse_regno)
2518 /* The following variables hold the static issued_error state. */
2519 static bool issued_sse_arg_error;
2520 static bool issued_sse_ret_error;
2521 static bool issued_x87_ret_error;
2523 machine_mode tmpmode;
2524 int bytes
2525 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2526 enum x86_64_reg_class regclass[MAX_CLASSES];
2527 int n;
2528 int i;
2529 int nexps = 0;
2530 int needed_sseregs, needed_intregs;
2531 rtx exp[MAX_CLASSES];
2532 rtx ret;
2534 n = classify_argument (mode, type, regclass, 0);
2535 if (!n)
2536 return NULL;
2537 if (examine_argument (mode, type, in_return, &needed_intregs,
2538 &needed_sseregs))
2539 return NULL;
2540 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2541 return NULL;
2543 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2544 some less clueful developer tries to use floating-point anyway. */
2545 if (needed_sseregs && !TARGET_SSE)
2547 if (in_return)
2549 if (!issued_sse_ret_error)
2551 error ("SSE register return with SSE disabled");
2552 issued_sse_ret_error = true;
2555 else if (!issued_sse_arg_error)
2557 error ("SSE register argument with SSE disabled");
2558 issued_sse_arg_error = true;
2560 return NULL;
2563 /* Likewise, error if the ABI requires us to return values in the
2564 x87 registers and the user specified -mno-80387. */
2565 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2566 for (i = 0; i < n; i++)
2567 if (regclass[i] == X86_64_X87_CLASS
2568 || regclass[i] == X86_64_X87UP_CLASS
2569 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2571 if (!issued_x87_ret_error)
2573 error ("x87 register return with x87 disabled");
2574 issued_x87_ret_error = true;
2576 return NULL;
2579 /* First construct simple cases. Avoid SCmode, since we want to use
2580 single register to pass this type. */
2581 if (n == 1 && mode != SCmode)
2582 switch (regclass[0])
2584 case X86_64_INTEGER_CLASS:
2585 case X86_64_INTEGERSI_CLASS:
2586 return gen_rtx_REG (mode, intreg[0]);
2587 case X86_64_SSE_CLASS:
2588 case X86_64_SSESF_CLASS:
2589 case X86_64_SSEDF_CLASS:
2590 if (mode != BLKmode)
2591 return gen_reg_or_parallel (mode, orig_mode,
2592 GET_SSE_REGNO (sse_regno));
2593 break;
2594 case X86_64_X87_CLASS:
2595 case X86_64_COMPLEX_X87_CLASS:
2596 return gen_rtx_REG (mode, FIRST_STACK_REG);
2597 case X86_64_NO_CLASS:
2598 /* Zero sized array, struct or class. */
2599 return NULL;
2600 default:
2601 gcc_unreachable ();
2603 if (n == 2
2604 && regclass[0] == X86_64_SSE_CLASS
2605 && regclass[1] == X86_64_SSEUP_CLASS
2606 && mode != BLKmode)
2607 return gen_reg_or_parallel (mode, orig_mode,
2608 GET_SSE_REGNO (sse_regno));
2609 if (n == 4
2610 && regclass[0] == X86_64_SSE_CLASS
2611 && regclass[1] == X86_64_SSEUP_CLASS
2612 && regclass[2] == X86_64_SSEUP_CLASS
2613 && regclass[3] == X86_64_SSEUP_CLASS
2614 && mode != BLKmode)
2615 return gen_reg_or_parallel (mode, orig_mode,
2616 GET_SSE_REGNO (sse_regno));
2617 if (n == 8
2618 && regclass[0] == X86_64_SSE_CLASS
2619 && regclass[1] == X86_64_SSEUP_CLASS
2620 && regclass[2] == X86_64_SSEUP_CLASS
2621 && regclass[3] == X86_64_SSEUP_CLASS
2622 && regclass[4] == X86_64_SSEUP_CLASS
2623 && regclass[5] == X86_64_SSEUP_CLASS
2624 && regclass[6] == X86_64_SSEUP_CLASS
2625 && regclass[7] == X86_64_SSEUP_CLASS
2626 && mode != BLKmode)
2627 return gen_reg_or_parallel (mode, orig_mode,
2628 GET_SSE_REGNO (sse_regno));
2629 if (n == 2
2630 && regclass[0] == X86_64_X87_CLASS
2631 && regclass[1] == X86_64_X87UP_CLASS)
2632 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2634 if (n == 2
2635 && regclass[0] == X86_64_INTEGER_CLASS
2636 && regclass[1] == X86_64_INTEGER_CLASS
2637 && (mode == CDImode || mode == TImode || mode == BLKmode)
2638 && intreg[0] + 1 == intreg[1])
2640 if (mode == BLKmode)
2642 /* Use TImode for BLKmode values in 2 integer registers. */
2643 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2644 gen_rtx_REG (TImode, intreg[0]),
2645 GEN_INT (0));
2646 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2647 XVECEXP (ret, 0, 0) = exp[0];
2648 return ret;
2650 else
2651 return gen_rtx_REG (mode, intreg[0]);
2654 /* Otherwise figure out the entries of the PARALLEL. */
2655 for (i = 0; i < n; i++)
2657 int pos;
2659 switch (regclass[i])
2661 case X86_64_NO_CLASS:
2662 break;
2663 case X86_64_INTEGER_CLASS:
2664 case X86_64_INTEGERSI_CLASS:
2665 /* Merge TImodes on aligned occasions here too. */
2666 if (i * 8 + 8 > bytes)
2668 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2669 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2670 /* We've requested 24 bytes we
2671 don't have mode for. Use DImode. */
2672 tmpmode = DImode;
2674 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2675 tmpmode = SImode;
2676 else
2677 tmpmode = DImode;
2678 exp [nexps++]
2679 = gen_rtx_EXPR_LIST (VOIDmode,
2680 gen_rtx_REG (tmpmode, *intreg),
2681 GEN_INT (i*8));
2682 intreg++;
2683 break;
2684 case X86_64_SSESF_CLASS:
2685 exp [nexps++]
2686 = gen_rtx_EXPR_LIST (VOIDmode,
2687 gen_rtx_REG (SFmode,
2688 GET_SSE_REGNO (sse_regno)),
2689 GEN_INT (i*8));
2690 sse_regno++;
2691 break;
2692 case X86_64_SSEDF_CLASS:
2693 exp [nexps++]
2694 = gen_rtx_EXPR_LIST (VOIDmode,
2695 gen_rtx_REG (DFmode,
2696 GET_SSE_REGNO (sse_regno)),
2697 GEN_INT (i*8));
2698 sse_regno++;
2699 break;
2700 case X86_64_SSE_CLASS:
2701 pos = i;
2702 switch (n)
2704 case 1:
2705 tmpmode = DImode;
2706 break;
2707 case 2:
2708 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2710 tmpmode = TImode;
2711 i++;
2713 else
2714 tmpmode = DImode;
2715 break;
2716 case 4:
2717 gcc_assert (i == 0
2718 && regclass[1] == X86_64_SSEUP_CLASS
2719 && regclass[2] == X86_64_SSEUP_CLASS
2720 && regclass[3] == X86_64_SSEUP_CLASS);
2721 tmpmode = OImode;
2722 i += 3;
2723 break;
2724 case 8:
2725 gcc_assert (i == 0
2726 && regclass[1] == X86_64_SSEUP_CLASS
2727 && regclass[2] == X86_64_SSEUP_CLASS
2728 && regclass[3] == X86_64_SSEUP_CLASS
2729 && regclass[4] == X86_64_SSEUP_CLASS
2730 && regclass[5] == X86_64_SSEUP_CLASS
2731 && regclass[6] == X86_64_SSEUP_CLASS
2732 && regclass[7] == X86_64_SSEUP_CLASS);
2733 tmpmode = XImode;
2734 i += 7;
2735 break;
2736 default:
2737 gcc_unreachable ();
2739 exp [nexps++]
2740 = gen_rtx_EXPR_LIST (VOIDmode,
2741 gen_rtx_REG (tmpmode,
2742 GET_SSE_REGNO (sse_regno)),
2743 GEN_INT (pos*8));
2744 sse_regno++;
2745 break;
2746 default:
2747 gcc_unreachable ();
2751 /* Empty aligned struct, union or class. */
2752 if (nexps == 0)
2753 return NULL;
2755 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2756 for (i = 0; i < nexps; i++)
2757 XVECEXP (ret, 0, i) = exp [i];
2758 return ret;
2761 /* Update the data in CUM to advance over an argument of mode MODE
2762 and data type TYPE. (TYPE is null for libcalls where that information
2763 may not be available.)
2765 Return a number of integer regsiters advanced over. */
2767 static int
2768 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2769 const_tree type, HOST_WIDE_INT bytes,
2770 HOST_WIDE_INT words)
2772 int res = 0;
2773 bool error_p = false;
2775 if (TARGET_IAMCU)
2777 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2778 bytes in registers. */
2779 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2780 goto pass_in_reg;
2781 return res;
2784 switch (mode)
2786 default:
2787 break;
2789 case E_BLKmode:
2790 if (bytes < 0)
2791 break;
2792 /* FALLTHRU */
2794 case E_DImode:
2795 case E_SImode:
2796 case E_HImode:
2797 case E_QImode:
2798 pass_in_reg:
2799 cum->words += words;
2800 cum->nregs -= words;
2801 cum->regno += words;
2802 if (cum->nregs >= 0)
2803 res = words;
2804 if (cum->nregs <= 0)
2806 cum->nregs = 0;
2807 cfun->machine->arg_reg_available = false;
2808 cum->regno = 0;
2810 break;
2812 case E_OImode:
2813 /* OImode shouldn't be used directly. */
2814 gcc_unreachable ();
2816 case E_DFmode:
2817 if (cum->float_in_sse == -1)
2818 error_p = true;
2819 if (cum->float_in_sse < 2)
2820 break;
2821 /* FALLTHRU */
2822 case E_SFmode:
2823 if (cum->float_in_sse == -1)
2824 error_p = true;
2825 if (cum->float_in_sse < 1)
2826 break;
2827 /* FALLTHRU */
2829 case E_V8SFmode:
2830 case E_V8SImode:
2831 case E_V64QImode:
2832 case E_V32HImode:
2833 case E_V16SImode:
2834 case E_V8DImode:
2835 case E_V16SFmode:
2836 case E_V8DFmode:
2837 case E_V32QImode:
2838 case E_V16HImode:
2839 case E_V4DFmode:
2840 case E_V4DImode:
2841 case E_TImode:
2842 case E_V16QImode:
2843 case E_V8HImode:
2844 case E_V4SImode:
2845 case E_V2DImode:
2846 case E_V4SFmode:
2847 case E_V2DFmode:
2848 if (!type || !AGGREGATE_TYPE_P (type))
2850 cum->sse_words += words;
2851 cum->sse_nregs -= 1;
2852 cum->sse_regno += 1;
2853 if (cum->sse_nregs <= 0)
2855 cum->sse_nregs = 0;
2856 cum->sse_regno = 0;
2859 break;
2861 case E_V8QImode:
2862 case E_V4HImode:
2863 case E_V2SImode:
2864 case E_V2SFmode:
2865 case E_V1TImode:
2866 case E_V1DImode:
2867 if (!type || !AGGREGATE_TYPE_P (type))
2869 cum->mmx_words += words;
2870 cum->mmx_nregs -= 1;
2871 cum->mmx_regno += 1;
2872 if (cum->mmx_nregs <= 0)
2874 cum->mmx_nregs = 0;
2875 cum->mmx_regno = 0;
2878 break;
2880 if (error_p)
2882 cum->float_in_sse = 0;
2883 error ("calling %qD with SSE calling convention without "
2884 "SSE/SSE2 enabled", cum->decl);
2885 sorry ("this is a GCC bug that can be worked around by adding "
2886 "attribute used to function called");
2889 return res;
2892 static int
2893 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
2894 const_tree type, HOST_WIDE_INT words, bool named)
2896 int int_nregs, sse_nregs;
2898 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
2899 if (!named && (VALID_AVX512F_REG_MODE (mode)
2900 || VALID_AVX256_REG_MODE (mode)))
2901 return 0;
2903 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
2904 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2906 cum->nregs -= int_nregs;
2907 cum->sse_nregs -= sse_nregs;
2908 cum->regno += int_nregs;
2909 cum->sse_regno += sse_nregs;
2910 return int_nregs;
2912 else
2914 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
2915 cum->words = ROUND_UP (cum->words, align);
2916 cum->words += words;
2917 return 0;
2921 static int
2922 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
2923 HOST_WIDE_INT words)
2925 /* Otherwise, this should be passed indirect. */
2926 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
2928 cum->words += words;
2929 if (cum->nregs > 0)
2931 cum->nregs -= 1;
2932 cum->regno += 1;
2933 return 1;
2935 return 0;
2938 /* Update the data in CUM to advance over an argument of mode MODE and
2939 data type TYPE. (TYPE is null for libcalls where that information
2940 may not be available.) */
2942 static void
2943 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
2944 const_tree type, bool named)
2946 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2947 HOST_WIDE_INT bytes, words;
2948 int nregs;
2950 /* The argument of interrupt handler is a special case and is
2951 handled in ix86_function_arg. */
2952 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
2953 return;
2955 if (mode == BLKmode)
2956 bytes = int_size_in_bytes (type);
2957 else
2958 bytes = GET_MODE_SIZE (mode);
2959 words = CEIL (bytes, UNITS_PER_WORD);
2961 if (type)
2962 mode = type_natural_mode (type, NULL, false);
2964 if (TARGET_64BIT)
2966 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
2968 if (call_abi == MS_ABI)
2969 nregs = function_arg_advance_ms_64 (cum, bytes, words);
2970 else
2971 nregs = function_arg_advance_64 (cum, mode, type, words, named);
2973 else
2974 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
2976 if (!nregs)
2978 /* Track if there are outgoing arguments on stack. */
2979 if (cum->caller)
2980 cfun->machine->outgoing_args_on_stack = true;
2984 /* Define where to put the arguments to a function.
2985 Value is zero to push the argument on the stack,
2986 or a hard register in which to store the argument.
2988 MODE is the argument's machine mode.
2989 TYPE is the data type of the argument (as a tree).
2990 This is null for libcalls where that information may
2991 not be available.
2992 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2993 the preceding args and about the function being called.
2994 NAMED is nonzero if this argument is a named parameter
2995 (otherwise it is an extra parameter matching an ellipsis). */
2997 static rtx
2998 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2999 machine_mode orig_mode, const_tree type,
3000 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3002 bool error_p = false;
3004 /* Avoid the AL settings for the Unix64 ABI. */
3005 if (mode == VOIDmode)
3006 return constm1_rtx;
3008 if (TARGET_IAMCU)
3010 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3011 bytes in registers. */
3012 if (!VECTOR_MODE_P (mode) && bytes <= 8)
3013 goto pass_in_reg;
3014 return NULL_RTX;
3017 switch (mode)
3019 default:
3020 break;
3022 case E_BLKmode:
3023 if (bytes < 0)
3024 break;
3025 /* FALLTHRU */
3026 case E_DImode:
3027 case E_SImode:
3028 case E_HImode:
3029 case E_QImode:
3030 pass_in_reg:
3031 if (words <= cum->nregs)
3033 int regno = cum->regno;
3035 /* Fastcall allocates the first two DWORD (SImode) or
3036 smaller arguments to ECX and EDX if it isn't an
3037 aggregate type . */
3038 if (cum->fastcall)
3040 if (mode == BLKmode
3041 || mode == DImode
3042 || (type && AGGREGATE_TYPE_P (type)))
3043 break;
3045 /* ECX not EAX is the first allocated register. */
3046 if (regno == AX_REG)
3047 regno = CX_REG;
3049 return gen_rtx_REG (mode, regno);
3051 break;
3053 case E_DFmode:
3054 if (cum->float_in_sse == -1)
3055 error_p = true;
3056 if (cum->float_in_sse < 2)
3057 break;
3058 /* FALLTHRU */
3059 case E_SFmode:
3060 if (cum->float_in_sse == -1)
3061 error_p = true;
3062 if (cum->float_in_sse < 1)
3063 break;
3064 /* FALLTHRU */
3065 case E_TImode:
3066 /* In 32bit, we pass TImode in xmm registers. */
3067 case E_V16QImode:
3068 case E_V8HImode:
3069 case E_V4SImode:
3070 case E_V2DImode:
3071 case E_V4SFmode:
3072 case E_V2DFmode:
3073 if (!type || !AGGREGATE_TYPE_P (type))
3075 if (cum->sse_nregs)
3076 return gen_reg_or_parallel (mode, orig_mode,
3077 cum->sse_regno + FIRST_SSE_REG);
3079 break;
3081 case E_OImode:
3082 case E_XImode:
3083 /* OImode and XImode shouldn't be used directly. */
3084 gcc_unreachable ();
3086 case E_V64QImode:
3087 case E_V32HImode:
3088 case E_V16SImode:
3089 case E_V8DImode:
3090 case E_V16SFmode:
3091 case E_V8DFmode:
3092 case E_V8SFmode:
3093 case E_V8SImode:
3094 case E_V32QImode:
3095 case E_V16HImode:
3096 case E_V4DFmode:
3097 case E_V4DImode:
3098 if (!type || !AGGREGATE_TYPE_P (type))
3100 if (cum->sse_nregs)
3101 return gen_reg_or_parallel (mode, orig_mode,
3102 cum->sse_regno + FIRST_SSE_REG);
3104 break;
3106 case E_V8QImode:
3107 case E_V4HImode:
3108 case E_V2SImode:
3109 case E_V2SFmode:
3110 case E_V1TImode:
3111 case E_V1DImode:
3112 if (!type || !AGGREGATE_TYPE_P (type))
3114 if (cum->mmx_nregs)
3115 return gen_reg_or_parallel (mode, orig_mode,
3116 cum->mmx_regno + FIRST_MMX_REG);
3118 break;
3120 if (error_p)
3122 cum->float_in_sse = 0;
3123 error ("calling %qD with SSE calling convention without "
3124 "SSE/SSE2 enabled", cum->decl);
3125 sorry ("this is a GCC bug that can be worked around by adding "
3126 "attribute used to function called");
3129 return NULL_RTX;
3132 static rtx
3133 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3134 machine_mode orig_mode, const_tree type, bool named)
3136 /* Handle a hidden AL argument containing number of registers
3137 for varargs x86-64 functions. */
3138 if (mode == VOIDmode)
3139 return GEN_INT (cum->maybe_vaarg
3140 ? (cum->sse_nregs < 0
3141 ? X86_64_SSE_REGPARM_MAX
3142 : cum->sse_regno)
3143 : -1);
3145 switch (mode)
3147 default:
3148 break;
3150 case E_V8SFmode:
3151 case E_V8SImode:
3152 case E_V32QImode:
3153 case E_V16HImode:
3154 case E_V4DFmode:
3155 case E_V4DImode:
3156 case E_V16SFmode:
3157 case E_V16SImode:
3158 case E_V64QImode:
3159 case E_V32HImode:
3160 case E_V8DFmode:
3161 case E_V8DImode:
3162 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3163 if (!named)
3164 return NULL;
3165 break;
3168 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3169 cum->sse_nregs,
3170 &x86_64_int_parameter_registers [cum->regno],
3171 cum->sse_regno);
3174 static rtx
3175 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3176 machine_mode orig_mode, bool named,
3177 HOST_WIDE_INT bytes)
3179 unsigned int regno;
3181 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3182 We use value of -2 to specify that current function call is MSABI. */
3183 if (mode == VOIDmode)
3184 return GEN_INT (-2);
3186 /* If we've run out of registers, it goes on the stack. */
3187 if (cum->nregs == 0)
3188 return NULL_RTX;
3190 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3192 /* Only floating point modes are passed in anything but integer regs. */
3193 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3195 if (named)
3196 regno = cum->regno + FIRST_SSE_REG;
3197 else
3199 rtx t1, t2;
3201 /* Unnamed floating parameters are passed in both the
3202 SSE and integer registers. */
3203 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3204 t2 = gen_rtx_REG (mode, regno);
3205 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3206 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3207 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3210 /* Handle aggregated types passed in register. */
3211 if (orig_mode == BLKmode)
3213 if (bytes > 0 && bytes <= 8)
3214 mode = (bytes > 4 ? DImode : SImode);
3215 if (mode == BLKmode)
3216 mode = DImode;
3219 return gen_reg_or_parallel (mode, orig_mode, regno);
3222 /* Return where to put the arguments to a function.
3223 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3225 MODE is the argument's machine mode. TYPE is the data type of the
3226 argument. It is null for libcalls where that information may not be
3227 available. CUM gives information about the preceding args and about
3228 the function being called. NAMED is nonzero if this argument is a
3229 named parameter (otherwise it is an extra parameter matching an
3230 ellipsis). */
3232 static rtx
3233 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
3234 const_tree type, bool named)
3236 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3237 machine_mode mode = omode;
3238 HOST_WIDE_INT bytes, words;
3239 rtx arg;
3241 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3243 gcc_assert (type != NULL_TREE);
3244 if (POINTER_TYPE_P (type))
3246 /* This is the pointer argument. */
3247 gcc_assert (TYPE_MODE (type) == Pmode);
3248 /* It is at -WORD(AP) in the current frame in interrupt and
3249 exception handlers. */
3250 arg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3252 else
3254 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3255 && TREE_CODE (type) == INTEGER_TYPE
3256 && TYPE_MODE (type) == word_mode);
3257 /* The error code is the word-mode integer argument at
3258 -2 * WORD(AP) in the current frame of the exception
3259 handler. */
3260 arg = gen_rtx_MEM (word_mode,
3261 plus_constant (Pmode,
3262 arg_pointer_rtx,
3263 -2 * UNITS_PER_WORD));
3265 return arg;
3268 if (mode == BLKmode)
3269 bytes = int_size_in_bytes (type);
3270 else
3271 bytes = GET_MODE_SIZE (mode);
3272 words = CEIL (bytes, UNITS_PER_WORD);
3274 /* To simplify the code below, represent vector types with a vector mode
3275 even if MMX/SSE are not active. */
3276 if (type && TREE_CODE (type) == VECTOR_TYPE)
3277 mode = type_natural_mode (type, cum, false);
3279 if (TARGET_64BIT)
3281 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3283 if (call_abi == MS_ABI)
3284 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
3285 else
3286 arg = function_arg_64 (cum, mode, omode, type, named);
3288 else
3289 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
3291 /* Track if there are outgoing arguments on stack. */
3292 if (arg == NULL_RTX && cum->caller)
3293 cfun->machine->outgoing_args_on_stack = true;
3295 return arg;
3298 /* A C expression that indicates when an argument must be passed by
3299 reference. If nonzero for an argument, a copy of that argument is
3300 made in memory and a pointer to the argument is passed instead of
3301 the argument itself. The pointer is passed in whatever way is
3302 appropriate for passing a pointer to that type. */
3304 static bool
3305 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
3306 const_tree type, bool)
3308 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3310 if (TARGET_64BIT)
3312 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3314 /* See Windows x64 Software Convention. */
3315 if (call_abi == MS_ABI)
3317 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
3319 if (type)
3321 /* Arrays are passed by reference. */
3322 if (TREE_CODE (type) == ARRAY_TYPE)
3323 return true;
3325 if (RECORD_OR_UNION_TYPE_P (type))
3327 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3328 are passed by reference. */
3329 msize = int_size_in_bytes (type);
3333 /* __m128 is passed by reference. */
3334 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3336 else if (type && int_size_in_bytes (type) == -1)
3337 return true;
3340 return false;
3343 /* Return true when TYPE should be 128bit aligned for 32bit argument
3344 passing ABI. XXX: This function is obsolete and is only used for
3345 checking psABI compatibility with previous versions of GCC. */
3347 static bool
3348 ix86_compat_aligned_value_p (const_tree type)
3350 machine_mode mode = TYPE_MODE (type);
3351 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3352 || mode == TDmode
3353 || mode == TFmode
3354 || mode == TCmode)
3355 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3356 return true;
3357 if (TYPE_ALIGN (type) < 128)
3358 return false;
3360 if (AGGREGATE_TYPE_P (type))
3362 /* Walk the aggregates recursively. */
3363 switch (TREE_CODE (type))
3365 case RECORD_TYPE:
3366 case UNION_TYPE:
3367 case QUAL_UNION_TYPE:
3369 tree field;
3371 /* Walk all the structure fields. */
3372 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3374 if (TREE_CODE (field) == FIELD_DECL
3375 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3376 return true;
3378 break;
3381 case ARRAY_TYPE:
3382 /* Just for use if some languages passes arrays by value. */
3383 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3384 return true;
3385 break;
3387 default:
3388 gcc_unreachable ();
3391 return false;
3394 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3395 XXX: This function is obsolete and is only used for checking psABI
3396 compatibility with previous versions of GCC. */
3398 static unsigned int
3399 ix86_compat_function_arg_boundary (machine_mode mode,
3400 const_tree type, unsigned int align)
3402 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3403 natural boundaries. */
3404 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3406 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3407 make an exception for SSE modes since these require 128bit
3408 alignment.
3410 The handling here differs from field_alignment. ICC aligns MMX
3411 arguments to 4 byte boundaries, while structure fields are aligned
3412 to 8 byte boundaries. */
3413 if (!type)
3415 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3416 align = PARM_BOUNDARY;
3418 else
3420 if (!ix86_compat_aligned_value_p (type))
3421 align = PARM_BOUNDARY;
3424 if (align > BIGGEST_ALIGNMENT)
3425 align = BIGGEST_ALIGNMENT;
3426 return align;
3429 /* Return true when TYPE should be 128bit aligned for 32bit argument
3430 passing ABI. */
3432 static bool
3433 ix86_contains_aligned_value_p (const_tree type)
3435 machine_mode mode = TYPE_MODE (type);
3437 if (mode == XFmode || mode == XCmode)
3438 return false;
3440 if (TYPE_ALIGN (type) < 128)
3441 return false;
3443 if (AGGREGATE_TYPE_P (type))
3445 /* Walk the aggregates recursively. */
3446 switch (TREE_CODE (type))
3448 case RECORD_TYPE:
3449 case UNION_TYPE:
3450 case QUAL_UNION_TYPE:
3452 tree field;
3454 /* Walk all the structure fields. */
3455 for (field = TYPE_FIELDS (type);
3456 field;
3457 field = DECL_CHAIN (field))
3459 if (TREE_CODE (field) == FIELD_DECL
3460 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3461 return true;
3463 break;
3466 case ARRAY_TYPE:
3467 /* Just for use if some languages passes arrays by value. */
3468 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3469 return true;
3470 break;
3472 default:
3473 gcc_unreachable ();
3476 else
3477 return TYPE_ALIGN (type) >= 128;
3479 return false;
3482 /* Gives the alignment boundary, in bits, of an argument with the
3483 specified mode and type. */
3485 static unsigned int
3486 ix86_function_arg_boundary (machine_mode mode, const_tree type)
3488 unsigned int align;
3489 if (type)
3491 /* Since the main variant type is used for call, we convert it to
3492 the main variant type. */
3493 type = TYPE_MAIN_VARIANT (type);
3494 align = TYPE_ALIGN (type);
3495 if (TYPE_EMPTY_P (type))
3496 return PARM_BOUNDARY;
3498 else
3499 align = GET_MODE_ALIGNMENT (mode);
3500 if (align < PARM_BOUNDARY)
3501 align = PARM_BOUNDARY;
3502 else
3504 static bool warned;
3505 unsigned int saved_align = align;
3507 if (!TARGET_64BIT)
3509 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3510 if (!type)
3512 if (mode == XFmode || mode == XCmode)
3513 align = PARM_BOUNDARY;
3515 else if (!ix86_contains_aligned_value_p (type))
3516 align = PARM_BOUNDARY;
3518 if (align < 128)
3519 align = PARM_BOUNDARY;
3522 if (warn_psabi
3523 && !warned
3524 && align != ix86_compat_function_arg_boundary (mode, type,
3525 saved_align))
3527 warned = true;
3528 inform (input_location,
3529 "the ABI for passing parameters with %d-byte"
3530 " alignment has changed in GCC 4.6",
3531 align / BITS_PER_UNIT);
3535 return align;
3538 /* Return true if N is a possible register number of function value. */
3540 static bool
3541 ix86_function_value_regno_p (const unsigned int regno)
3543 switch (regno)
3545 case AX_REG:
3546 return true;
3547 case DX_REG:
3548 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3549 case DI_REG:
3550 case SI_REG:
3551 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3553 /* Complex values are returned in %st(0)/%st(1) pair. */
3554 case ST0_REG:
3555 case ST1_REG:
3556 /* TODO: The function should depend on current function ABI but
3557 builtins.c would need updating then. Therefore we use the
3558 default ABI. */
3559 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3560 return false;
3561 return TARGET_FLOAT_RETURNS_IN_80387;
3563 /* Complex values are returned in %xmm0/%xmm1 pair. */
3564 case XMM0_REG:
3565 case XMM1_REG:
3566 return TARGET_SSE;
3568 case MM0_REG:
3569 if (TARGET_MACHO || TARGET_64BIT)
3570 return false;
3571 return TARGET_MMX;
3574 return false;
3577 /* Define how to find the value returned by a function.
3578 VALTYPE is the data type of the value (as a tree).
3579 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3580 otherwise, FUNC is 0. */
3582 static rtx
3583 function_value_32 (machine_mode orig_mode, machine_mode mode,
3584 const_tree fntype, const_tree fn)
3586 unsigned int regno;
3588 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3589 we normally prevent this case when mmx is not available. However
3590 some ABIs may require the result to be returned like DImode. */
3591 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3592 regno = FIRST_MMX_REG;
3594 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3595 we prevent this case when sse is not available. However some ABIs
3596 may require the result to be returned like integer TImode. */
3597 else if (mode == TImode
3598 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3599 regno = FIRST_SSE_REG;
3601 /* 32-byte vector modes in %ymm0. */
3602 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
3603 regno = FIRST_SSE_REG;
3605 /* 64-byte vector modes in %zmm0. */
3606 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
3607 regno = FIRST_SSE_REG;
3609 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
3610 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
3611 regno = FIRST_FLOAT_REG;
3612 else
3613 /* Most things go in %eax. */
3614 regno = AX_REG;
3616 /* Override FP return register with %xmm0 for local functions when
3617 SSE math is enabled or for functions with sseregparm attribute. */
3618 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
3620 int sse_level = ix86_function_sseregparm (fntype, fn, false);
3621 if (sse_level == -1)
3623 error ("calling %qD with SSE calling convention without "
3624 "SSE/SSE2 enabled", fn);
3625 sorry ("this is a GCC bug that can be worked around by adding "
3626 "attribute used to function called");
3628 else if ((sse_level >= 1 && mode == SFmode)
3629 || (sse_level == 2 && mode == DFmode))
3630 regno = FIRST_SSE_REG;
3633 /* OImode shouldn't be used directly. */
3634 gcc_assert (mode != OImode);
3636 return gen_rtx_REG (orig_mode, regno);
3639 static rtx
3640 function_value_64 (machine_mode orig_mode, machine_mode mode,
3641 const_tree valtype)
3643 rtx ret;
3645 /* Handle libcalls, which don't provide a type node. */
3646 if (valtype == NULL)
3648 unsigned int regno;
3650 switch (mode)
3652 case E_SFmode:
3653 case E_SCmode:
3654 case E_DFmode:
3655 case E_DCmode:
3656 case E_TFmode:
3657 case E_SDmode:
3658 case E_DDmode:
3659 case E_TDmode:
3660 regno = FIRST_SSE_REG;
3661 break;
3662 case E_XFmode:
3663 case E_XCmode:
3664 regno = FIRST_FLOAT_REG;
3665 break;
3666 case E_TCmode:
3667 return NULL;
3668 default:
3669 regno = AX_REG;
3672 return gen_rtx_REG (mode, regno);
3674 else if (POINTER_TYPE_P (valtype))
3676 /* Pointers are always returned in word_mode. */
3677 mode = word_mode;
3680 ret = construct_container (mode, orig_mode, valtype, 1,
3681 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
3682 x86_64_int_return_registers, 0);
3684 /* For zero sized structures, construct_container returns NULL, but we
3685 need to keep rest of compiler happy by returning meaningful value. */
3686 if (!ret)
3687 ret = gen_rtx_REG (orig_mode, AX_REG);
3689 return ret;
3692 static rtx
3693 function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
3694 const_tree fntype, const_tree fn, const_tree valtype)
3696 unsigned int regno;
3698 /* Floating point return values in %st(0)
3699 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
3700 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
3701 && (GET_MODE_SIZE (mode) > 8
3702 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
3704 regno = FIRST_FLOAT_REG;
3705 return gen_rtx_REG (orig_mode, regno);
3707 else
3708 return function_value_32(orig_mode, mode, fntype,fn);
3711 static rtx
3712 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
3713 const_tree valtype)
3715 unsigned int regno = AX_REG;
3717 if (TARGET_SSE)
3719 switch (GET_MODE_SIZE (mode))
3721 case 16:
3722 if (valtype != NULL_TREE
3723 && !VECTOR_INTEGER_TYPE_P (valtype)
3724 && !VECTOR_INTEGER_TYPE_P (valtype)
3725 && !INTEGRAL_TYPE_P (valtype)
3726 && !VECTOR_FLOAT_TYPE_P (valtype))
3727 break;
3728 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
3729 && !COMPLEX_MODE_P (mode))
3730 regno = FIRST_SSE_REG;
3731 break;
3732 case 8:
3733 case 4:
3734 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
3735 break;
3736 if (mode == SFmode || mode == DFmode)
3737 regno = FIRST_SSE_REG;
3738 break;
3739 default:
3740 break;
3743 return gen_rtx_REG (orig_mode, regno);
3746 static rtx
3747 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
3748 machine_mode orig_mode, machine_mode mode)
3750 const_tree fn, fntype;
3752 fn = NULL_TREE;
3753 if (fntype_or_decl && DECL_P (fntype_or_decl))
3754 fn = fntype_or_decl;
3755 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3757 if (ix86_function_type_abi (fntype) == MS_ABI)
3759 if (TARGET_64BIT)
3760 return function_value_ms_64 (orig_mode, mode, valtype);
3761 else
3762 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
3764 else if (TARGET_64BIT)
3765 return function_value_64 (orig_mode, mode, valtype);
3766 else
3767 return function_value_32 (orig_mode, mode, fntype, fn);
3770 static rtx
3771 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
3773 machine_mode mode, orig_mode;
3775 orig_mode = TYPE_MODE (valtype);
3776 mode = type_natural_mode (valtype, NULL, true);
3777 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
3780 /* Pointer function arguments and return values are promoted to
3781 word_mode for normal functions. */
3783 static machine_mode
3784 ix86_promote_function_mode (const_tree type, machine_mode mode,
3785 int *punsignedp, const_tree fntype,
3786 int for_return)
3788 if (cfun->machine->func_type == TYPE_NORMAL
3789 && type != NULL_TREE
3790 && POINTER_TYPE_P (type))
3792 *punsignedp = POINTERS_EXTEND_UNSIGNED;
3793 return word_mode;
3795 return default_promote_function_mode (type, mode, punsignedp, fntype,
3796 for_return);
3799 /* Return true if a structure, union or array with MODE containing FIELD
3800 should be accessed using BLKmode. */
3802 static bool
3803 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
3805 /* Union with XFmode must be in BLKmode. */
3806 return (mode == XFmode
3807 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
3808 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
3812 ix86_libcall_value (machine_mode mode)
3814 return ix86_function_value_1 (NULL, NULL, mode, mode);
3817 /* Return true iff type is returned in memory. */
3819 static bool
3820 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
3822 #ifdef SUBTARGET_RETURN_IN_MEMORY
3823 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
3824 #else
3825 const machine_mode mode = type_natural_mode (type, NULL, true);
3826 HOST_WIDE_INT size;
3828 if (TARGET_64BIT)
3830 if (ix86_function_type_abi (fntype) == MS_ABI)
3832 size = int_size_in_bytes (type);
3834 /* __m128 is returned in xmm0. */
3835 if ((!type || VECTOR_INTEGER_TYPE_P (type)
3836 || INTEGRAL_TYPE_P (type)
3837 || VECTOR_FLOAT_TYPE_P (type))
3838 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
3839 && !COMPLEX_MODE_P (mode)
3840 && (GET_MODE_SIZE (mode) == 16 || size == 16))
3841 return false;
3843 /* Otherwise, the size must be exactly in [1248]. */
3844 return size != 1 && size != 2 && size != 4 && size != 8;
3846 else
3848 int needed_intregs, needed_sseregs;
3850 return examine_argument (mode, type, 1,
3851 &needed_intregs, &needed_sseregs);
3854 else
3856 size = int_size_in_bytes (type);
3858 /* Intel MCU psABI returns scalars and aggregates no larger than 8
3859 bytes in registers. */
3860 if (TARGET_IAMCU)
3861 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
3863 if (mode == BLKmode)
3864 return true;
3866 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3867 return false;
3869 if (VECTOR_MODE_P (mode) || mode == TImode)
3871 /* User-created vectors small enough to fit in EAX. */
3872 if (size < 8)
3873 return false;
3875 /* Unless ABI prescibes otherwise,
3876 MMX/3dNow values are returned in MM0 if available. */
3878 if (size == 8)
3879 return TARGET_VECT8_RETURNS || !TARGET_MMX;
3881 /* SSE values are returned in XMM0 if available. */
3882 if (size == 16)
3883 return !TARGET_SSE;
3885 /* AVX values are returned in YMM0 if available. */
3886 if (size == 32)
3887 return !TARGET_AVX;
3889 /* AVX512F values are returned in ZMM0 if available. */
3890 if (size == 64)
3891 return !TARGET_AVX512F;
3894 if (mode == XFmode)
3895 return false;
3897 if (size > 12)
3898 return true;
3900 /* OImode shouldn't be used directly. */
3901 gcc_assert (mode != OImode);
3903 return false;
3905 #endif
3909 /* Create the va_list data type. */
3911 static tree
3912 ix86_build_builtin_va_list_64 (void)
3914 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3916 record = lang_hooks.types.make_type (RECORD_TYPE);
3917 type_decl = build_decl (BUILTINS_LOCATION,
3918 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3920 f_gpr = build_decl (BUILTINS_LOCATION,
3921 FIELD_DECL, get_identifier ("gp_offset"),
3922 unsigned_type_node);
3923 f_fpr = build_decl (BUILTINS_LOCATION,
3924 FIELD_DECL, get_identifier ("fp_offset"),
3925 unsigned_type_node);
3926 f_ovf = build_decl (BUILTINS_LOCATION,
3927 FIELD_DECL, get_identifier ("overflow_arg_area"),
3928 ptr_type_node);
3929 f_sav = build_decl (BUILTINS_LOCATION,
3930 FIELD_DECL, get_identifier ("reg_save_area"),
3931 ptr_type_node);
3933 va_list_gpr_counter_field = f_gpr;
3934 va_list_fpr_counter_field = f_fpr;
3936 DECL_FIELD_CONTEXT (f_gpr) = record;
3937 DECL_FIELD_CONTEXT (f_fpr) = record;
3938 DECL_FIELD_CONTEXT (f_ovf) = record;
3939 DECL_FIELD_CONTEXT (f_sav) = record;
3941 TYPE_STUB_DECL (record) = type_decl;
3942 TYPE_NAME (record) = type_decl;
3943 TYPE_FIELDS (record) = f_gpr;
3944 DECL_CHAIN (f_gpr) = f_fpr;
3945 DECL_CHAIN (f_fpr) = f_ovf;
3946 DECL_CHAIN (f_ovf) = f_sav;
3948 layout_type (record);
3950 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
3951 NULL_TREE, TYPE_ATTRIBUTES (record));
3953 /* The correct type is an array type of one element. */
3954 return build_array_type (record, build_index_type (size_zero_node));
3957 /* Setup the builtin va_list data type and for 64-bit the additional
3958 calling convention specific va_list data types. */
3960 static tree
3961 ix86_build_builtin_va_list (void)
3963 if (TARGET_64BIT)
3965 /* Initialize ABI specific va_list builtin types.
3967 In lto1, we can encounter two va_list types:
3968 - one as a result of the type-merge across TUs, and
3969 - the one constructed here.
3970 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
3971 a type identity check in canonical_va_list_type based on
3972 TYPE_MAIN_VARIANT (which we used to have) will not work.
3973 Instead, we tag each va_list_type_node with its unique attribute, and
3974 look for the attribute in the type identity check in
3975 canonical_va_list_type.
3977 Tagging sysv_va_list_type_node directly with the attribute is
3978 problematic since it's a array of one record, which will degrade into a
3979 pointer to record when used as parameter (see build_va_arg comments for
3980 an example), dropping the attribute in the process. So we tag the
3981 record instead. */
3983 /* For SYSV_ABI we use an array of one record. */
3984 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
3986 /* For MS_ABI we use plain pointer to argument area. */
3987 tree char_ptr_type = build_pointer_type (char_type_node);
3988 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
3989 TYPE_ATTRIBUTES (char_ptr_type));
3990 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
3992 return ((ix86_abi == MS_ABI)
3993 ? ms_va_list_type_node
3994 : sysv_va_list_type_node);
3996 else
3998 /* For i386 we use plain pointer to argument area. */
3999 return build_pointer_type (char_type_node);
4003 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4005 static void
4006 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4008 rtx save_area, mem;
4009 alias_set_type set;
4010 int i, max;
4012 /* GPR size of varargs save area. */
4013 if (cfun->va_list_gpr_size)
4014 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4015 else
4016 ix86_varargs_gpr_size = 0;
4018 /* FPR size of varargs save area. We don't need it if we don't pass
4019 anything in SSE registers. */
4020 if (TARGET_SSE && cfun->va_list_fpr_size)
4021 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4022 else
4023 ix86_varargs_fpr_size = 0;
4025 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4026 return;
4028 save_area = frame_pointer_rtx;
4029 set = get_varargs_alias_set ();
4031 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4032 if (max > X86_64_REGPARM_MAX)
4033 max = X86_64_REGPARM_MAX;
4035 for (i = cum->regno; i < max; i++)
4037 mem = gen_rtx_MEM (word_mode,
4038 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4039 MEM_NOTRAP_P (mem) = 1;
4040 set_mem_alias_set (mem, set);
4041 emit_move_insn (mem,
4042 gen_rtx_REG (word_mode,
4043 x86_64_int_parameter_registers[i]));
4046 if (ix86_varargs_fpr_size)
4048 machine_mode smode;
4049 rtx_code_label *label;
4050 rtx test;
4052 /* Now emit code to save SSE registers. The AX parameter contains number
4053 of SSE parameter registers used to call this function, though all we
4054 actually check here is the zero/non-zero status. */
4056 label = gen_label_rtx ();
4057 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4058 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4059 label));
4061 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4062 we used movdqa (i.e. TImode) instead? Perhaps even better would
4063 be if we could determine the real mode of the data, via a hook
4064 into pass_stdarg. Ignore all that for now. */
4065 smode = V4SFmode;
4066 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4067 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4069 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4070 if (max > X86_64_SSE_REGPARM_MAX)
4071 max = X86_64_SSE_REGPARM_MAX;
4073 for (i = cum->sse_regno; i < max; ++i)
4075 mem = plus_constant (Pmode, save_area,
4076 i * 16 + ix86_varargs_gpr_size);
4077 mem = gen_rtx_MEM (smode, mem);
4078 MEM_NOTRAP_P (mem) = 1;
4079 set_mem_alias_set (mem, set);
4080 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4082 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4085 emit_label (label);
4089 static void
4090 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4092 alias_set_type set = get_varargs_alias_set ();
4093 int i;
4095 /* Reset to zero, as there might be a sysv vaarg used
4096 before. */
4097 ix86_varargs_gpr_size = 0;
4098 ix86_varargs_fpr_size = 0;
4100 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4102 rtx reg, mem;
4104 mem = gen_rtx_MEM (Pmode,
4105 plus_constant (Pmode, virtual_incoming_args_rtx,
4106 i * UNITS_PER_WORD));
4107 MEM_NOTRAP_P (mem) = 1;
4108 set_mem_alias_set (mem, set);
4110 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4111 emit_move_insn (mem, reg);
4115 static void
4116 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
4117 tree type, int *, int no_rtl)
4119 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4120 CUMULATIVE_ARGS next_cum;
4121 tree fntype;
4123 /* This argument doesn't appear to be used anymore. Which is good,
4124 because the old code here didn't suppress rtl generation. */
4125 gcc_assert (!no_rtl);
4127 if (!TARGET_64BIT)
4128 return;
4130 fntype = TREE_TYPE (current_function_decl);
4132 /* For varargs, we do not want to skip the dummy va_dcl argument.
4133 For stdargs, we do want to skip the last named argument. */
4134 next_cum = *cum;
4135 if (stdarg_p (fntype))
4136 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
4137 true);
4139 if (cum->call_abi == MS_ABI)
4140 setup_incoming_varargs_ms_64 (&next_cum);
4141 else
4142 setup_incoming_varargs_64 (&next_cum);
4145 static void
4146 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
4147 machine_mode mode,
4148 tree type,
4149 int *pretend_size ATTRIBUTE_UNUSED,
4150 int no_rtl)
4152 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4153 CUMULATIVE_ARGS next_cum;
4154 tree fntype;
4155 int max;
4157 gcc_assert (!no_rtl);
4159 /* Do nothing if we use plain pointer to argument area. */
4160 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
4161 return;
4163 fntype = TREE_TYPE (current_function_decl);
4165 /* For varargs, we do not want to skip the dummy va_dcl argument.
4166 For stdargs, we do want to skip the last named argument. */
4167 next_cum = *cum;
4168 if (stdarg_p (fntype))
4169 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
4170 true);
4172 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4173 if (max > X86_64_REGPARM_MAX)
4174 max = X86_64_REGPARM_MAX;
4178 /* Checks if TYPE is of kind va_list char *. */
4180 static bool
4181 is_va_list_char_pointer (tree type)
4183 tree canonic;
4185 /* For 32-bit it is always true. */
4186 if (!TARGET_64BIT)
4187 return true;
4188 canonic = ix86_canonical_va_list_type (type);
4189 return (canonic == ms_va_list_type_node
4190 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4193 /* Implement va_start. */
4195 static void
4196 ix86_va_start (tree valist, rtx nextarg)
4198 HOST_WIDE_INT words, n_gpr, n_fpr;
4199 tree f_gpr, f_fpr, f_ovf, f_sav;
4200 tree gpr, fpr, ovf, sav, t;
4201 tree type;
4202 rtx ovf_rtx;
4204 if (flag_split_stack
4205 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4207 unsigned int scratch_regno;
4209 /* When we are splitting the stack, we can't refer to the stack
4210 arguments using internal_arg_pointer, because they may be on
4211 the old stack. The split stack prologue will arrange to
4212 leave a pointer to the old stack arguments in a scratch
4213 register, which we here copy to a pseudo-register. The split
4214 stack prologue can't set the pseudo-register directly because
4215 it (the prologue) runs before any registers have been saved. */
4217 scratch_regno = split_stack_prologue_scratch_regno ();
4218 if (scratch_regno != INVALID_REGNUM)
4220 rtx reg;
4221 rtx_insn *seq;
4223 reg = gen_reg_rtx (Pmode);
4224 cfun->machine->split_stack_varargs_pointer = reg;
4226 start_sequence ();
4227 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4228 seq = get_insns ();
4229 end_sequence ();
4231 push_topmost_sequence ();
4232 emit_insn_after (seq, entry_of_function ());
4233 pop_topmost_sequence ();
4237 /* Only 64bit target needs something special. */
4238 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4240 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4241 std_expand_builtin_va_start (valist, nextarg);
4242 else
4244 rtx va_r, next;
4246 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4247 next = expand_binop (ptr_mode, add_optab,
4248 cfun->machine->split_stack_varargs_pointer,
4249 crtl->args.arg_offset_rtx,
4250 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4251 convert_move (va_r, next, 0);
4253 return;
4256 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4257 f_fpr = DECL_CHAIN (f_gpr);
4258 f_ovf = DECL_CHAIN (f_fpr);
4259 f_sav = DECL_CHAIN (f_ovf);
4261 valist = build_simple_mem_ref (valist);
4262 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4263 /* The following should be folded into the MEM_REF offset. */
4264 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4265 f_gpr, NULL_TREE);
4266 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4267 f_fpr, NULL_TREE);
4268 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4269 f_ovf, NULL_TREE);
4270 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4271 f_sav, NULL_TREE);
4273 /* Count number of gp and fp argument registers used. */
4274 words = crtl->args.info.words;
4275 n_gpr = crtl->args.info.regno;
4276 n_fpr = crtl->args.info.sse_regno;
4278 if (cfun->va_list_gpr_size)
4280 type = TREE_TYPE (gpr);
4281 t = build2 (MODIFY_EXPR, type,
4282 gpr, build_int_cst (type, n_gpr * 8));
4283 TREE_SIDE_EFFECTS (t) = 1;
4284 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4287 if (TARGET_SSE && cfun->va_list_fpr_size)
4289 type = TREE_TYPE (fpr);
4290 t = build2 (MODIFY_EXPR, type, fpr,
4291 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4292 TREE_SIDE_EFFECTS (t) = 1;
4293 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4296 /* Find the overflow area. */
4297 type = TREE_TYPE (ovf);
4298 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4299 ovf_rtx = crtl->args.internal_arg_pointer;
4300 else
4301 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4302 t = make_tree (type, ovf_rtx);
4303 if (words != 0)
4304 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4306 t = build2 (MODIFY_EXPR, type, ovf, t);
4307 TREE_SIDE_EFFECTS (t) = 1;
4308 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4310 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4312 /* Find the register save area.
4313 Prologue of the function save it right above stack frame. */
4314 type = TREE_TYPE (sav);
4315 t = make_tree (type, frame_pointer_rtx);
4316 if (!ix86_varargs_gpr_size)
4317 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4319 t = build2 (MODIFY_EXPR, type, sav, t);
4320 TREE_SIDE_EFFECTS (t) = 1;
4321 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4325 /* Implement va_arg. */
4327 static tree
4328 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4329 gimple_seq *post_p)
4331 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4332 tree f_gpr, f_fpr, f_ovf, f_sav;
4333 tree gpr, fpr, ovf, sav, t;
4334 int size, rsize;
4335 tree lab_false, lab_over = NULL_TREE;
4336 tree addr, t2;
4337 rtx container;
4338 int indirect_p = 0;
4339 tree ptrtype;
4340 machine_mode nat_mode;
4341 unsigned int arg_boundary;
4343 /* Only 64bit target needs something special. */
4344 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4345 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4347 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4348 f_fpr = DECL_CHAIN (f_gpr);
4349 f_ovf = DECL_CHAIN (f_fpr);
4350 f_sav = DECL_CHAIN (f_ovf);
4352 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4353 valist, f_gpr, NULL_TREE);
4355 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4356 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4357 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4359 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4360 if (indirect_p)
4361 type = build_pointer_type (type);
4362 size = arg_int_size_in_bytes (type);
4363 rsize = CEIL (size, UNITS_PER_WORD);
4365 nat_mode = type_natural_mode (type, NULL, false);
4366 switch (nat_mode)
4368 case E_V8SFmode:
4369 case E_V8SImode:
4370 case E_V32QImode:
4371 case E_V16HImode:
4372 case E_V4DFmode:
4373 case E_V4DImode:
4374 case E_V16SFmode:
4375 case E_V16SImode:
4376 case E_V64QImode:
4377 case E_V32HImode:
4378 case E_V8DFmode:
4379 case E_V8DImode:
4380 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4381 if (!TARGET_64BIT_MS_ABI)
4383 container = NULL;
4384 break;
4386 /* FALLTHRU */
4388 default:
4389 container = construct_container (nat_mode, TYPE_MODE (type),
4390 type, 0, X86_64_REGPARM_MAX,
4391 X86_64_SSE_REGPARM_MAX, intreg,
4393 break;
4396 /* Pull the value out of the saved registers. */
4398 addr = create_tmp_var (ptr_type_node, "addr");
4400 if (container)
4402 int needed_intregs, needed_sseregs;
4403 bool need_temp;
4404 tree int_addr, sse_addr;
4406 lab_false = create_artificial_label (UNKNOWN_LOCATION);
4407 lab_over = create_artificial_label (UNKNOWN_LOCATION);
4409 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4411 need_temp = (!REG_P (container)
4412 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4413 || TYPE_ALIGN (type) > 128));
4415 /* In case we are passing structure, verify that it is consecutive block
4416 on the register save area. If not we need to do moves. */
4417 if (!need_temp && !REG_P (container))
4419 /* Verify that all registers are strictly consecutive */
4420 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4422 int i;
4424 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4426 rtx slot = XVECEXP (container, 0, i);
4427 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4428 || INTVAL (XEXP (slot, 1)) != i * 16)
4429 need_temp = true;
4432 else
4434 int i;
4436 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4438 rtx slot = XVECEXP (container, 0, i);
4439 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4440 || INTVAL (XEXP (slot, 1)) != i * 8)
4441 need_temp = true;
4445 if (!need_temp)
4447 int_addr = addr;
4448 sse_addr = addr;
4450 else
4452 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4453 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4456 /* First ensure that we fit completely in registers. */
4457 if (needed_intregs)
4459 t = build_int_cst (TREE_TYPE (gpr),
4460 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4461 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4462 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4463 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4464 gimplify_and_add (t, pre_p);
4466 if (needed_sseregs)
4468 t = build_int_cst (TREE_TYPE (fpr),
4469 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4470 + X86_64_REGPARM_MAX * 8);
4471 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4472 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4473 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4474 gimplify_and_add (t, pre_p);
4477 /* Compute index to start of area used for integer regs. */
4478 if (needed_intregs)
4480 /* int_addr = gpr + sav; */
4481 t = fold_build_pointer_plus (sav, gpr);
4482 gimplify_assign (int_addr, t, pre_p);
4484 if (needed_sseregs)
4486 /* sse_addr = fpr + sav; */
4487 t = fold_build_pointer_plus (sav, fpr);
4488 gimplify_assign (sse_addr, t, pre_p);
4490 if (need_temp)
4492 int i, prev_size = 0;
4493 tree temp = create_tmp_var (type, "va_arg_tmp");
4495 /* addr = &temp; */
4496 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4497 gimplify_assign (addr, t, pre_p);
4499 for (i = 0; i < XVECLEN (container, 0); i++)
4501 rtx slot = XVECEXP (container, 0, i);
4502 rtx reg = XEXP (slot, 0);
4503 machine_mode mode = GET_MODE (reg);
4504 tree piece_type;
4505 tree addr_type;
4506 tree daddr_type;
4507 tree src_addr, src;
4508 int src_offset;
4509 tree dest_addr, dest;
4510 int cur_size = GET_MODE_SIZE (mode);
4512 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
4513 prev_size = INTVAL (XEXP (slot, 1));
4514 if (prev_size + cur_size > size)
4516 cur_size = size - prev_size;
4517 unsigned int nbits = cur_size * BITS_PER_UNIT;
4518 if (!int_mode_for_size (nbits, 1).exists (&mode))
4519 mode = QImode;
4521 piece_type = lang_hooks.types.type_for_mode (mode, 1);
4522 if (mode == GET_MODE (reg))
4523 addr_type = build_pointer_type (piece_type);
4524 else
4525 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4526 true);
4527 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4528 true);
4530 if (SSE_REGNO_P (REGNO (reg)))
4532 src_addr = sse_addr;
4533 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4535 else
4537 src_addr = int_addr;
4538 src_offset = REGNO (reg) * 8;
4540 src_addr = fold_convert (addr_type, src_addr);
4541 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
4543 dest_addr = fold_convert (daddr_type, addr);
4544 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
4545 if (cur_size == GET_MODE_SIZE (mode))
4547 src = build_va_arg_indirect_ref (src_addr);
4548 dest = build_va_arg_indirect_ref (dest_addr);
4550 gimplify_assign (dest, src, pre_p);
4552 else
4554 tree copy
4555 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
4556 3, dest_addr, src_addr,
4557 size_int (cur_size));
4558 gimplify_and_add (copy, pre_p);
4560 prev_size += cur_size;
4564 if (needed_intregs)
4566 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4567 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4568 gimplify_assign (gpr, t, pre_p);
4571 if (needed_sseregs)
4573 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4574 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4575 gimplify_assign (unshare_expr (fpr), t, pre_p);
4578 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
4580 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
4583 /* ... otherwise out of the overflow area. */
4585 /* When we align parameter on stack for caller, if the parameter
4586 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
4587 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
4588 here with caller. */
4589 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
4590 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
4591 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
4593 /* Care for on-stack alignment if needed. */
4594 if (arg_boundary <= 64 || size == 0)
4595 t = ovf;
4596 else
4598 HOST_WIDE_INT align = arg_boundary / 8;
4599 t = fold_build_pointer_plus_hwi (ovf, align - 1);
4600 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4601 build_int_cst (TREE_TYPE (t), -align));
4604 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4605 gimplify_assign (addr, t, pre_p);
4607 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
4608 gimplify_assign (unshare_expr (ovf), t, pre_p);
4610 if (container)
4611 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
4613 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
4614 addr = fold_convert (ptrtype, addr);
4616 if (indirect_p)
4617 addr = build_va_arg_indirect_ref (addr);
4618 return build_va_arg_indirect_ref (addr);
4621 /* Return true if OPNUM's MEM should be matched
4622 in movabs* patterns. */
4624 bool
4625 ix86_check_movabs (rtx insn, int opnum)
4627 rtx set, mem;
4629 set = PATTERN (insn);
4630 if (GET_CODE (set) == PARALLEL)
4631 set = XVECEXP (set, 0, 0);
4632 gcc_assert (GET_CODE (set) == SET);
4633 mem = XEXP (set, opnum);
4634 while (SUBREG_P (mem))
4635 mem = SUBREG_REG (mem);
4636 gcc_assert (MEM_P (mem));
4637 return volatile_ok || !MEM_VOLATILE_P (mem);
4640 /* Return false if INSN contains a MEM with a non-default address space. */
4641 bool
4642 ix86_check_no_addr_space (rtx insn)
4644 subrtx_var_iterator::array_type array;
4645 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
4647 rtx x = *iter;
4648 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
4649 return false;
4651 return true;
4654 /* Initialize the table of extra 80387 mathematical constants. */
4656 static void
4657 init_ext_80387_constants (void)
4659 static const char * cst[5] =
4661 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4662 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4663 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4664 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4665 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4667 int i;
4669 for (i = 0; i < 5; i++)
4671 real_from_string (&ext_80387_constants_table[i], cst[i]);
4672 /* Ensure each constant is rounded to XFmode precision. */
4673 real_convert (&ext_80387_constants_table[i],
4674 XFmode, &ext_80387_constants_table[i]);
4677 ext_80387_constants_init = 1;
4680 /* Return non-zero if the constant is something that
4681 can be loaded with a special instruction. */
4684 standard_80387_constant_p (rtx x)
4686 machine_mode mode = GET_MODE (x);
4688 const REAL_VALUE_TYPE *r;
4690 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
4691 return -1;
4693 if (x == CONST0_RTX (mode))
4694 return 1;
4695 if (x == CONST1_RTX (mode))
4696 return 2;
4698 r = CONST_DOUBLE_REAL_VALUE (x);
4700 /* For XFmode constants, try to find a special 80387 instruction when
4701 optimizing for size or on those CPUs that benefit from them. */
4702 if (mode == XFmode
4703 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
4705 int i;
4707 if (! ext_80387_constants_init)
4708 init_ext_80387_constants ();
4710 for (i = 0; i < 5; i++)
4711 if (real_identical (r, &ext_80387_constants_table[i]))
4712 return i + 3;
4715 /* Load of the constant -0.0 or -1.0 will be split as
4716 fldz;fchs or fld1;fchs sequence. */
4717 if (real_isnegzero (r))
4718 return 8;
4719 if (real_identical (r, &dconstm1))
4720 return 9;
4722 return 0;
4725 /* Return the opcode of the special instruction to be used to load
4726 the constant X. */
4728 const char *
4729 standard_80387_constant_opcode (rtx x)
4731 switch (standard_80387_constant_p (x))
4733 case 1:
4734 return "fldz";
4735 case 2:
4736 return "fld1";
4737 case 3:
4738 return "fldlg2";
4739 case 4:
4740 return "fldln2";
4741 case 5:
4742 return "fldl2e";
4743 case 6:
4744 return "fldl2t";
4745 case 7:
4746 return "fldpi";
4747 case 8:
4748 case 9:
4749 return "#";
4750 default:
4751 gcc_unreachable ();
4755 /* Return the CONST_DOUBLE representing the 80387 constant that is
4756 loaded by the specified special instruction. The argument IDX
4757 matches the return value from standard_80387_constant_p. */
4760 standard_80387_constant_rtx (int idx)
4762 int i;
4764 if (! ext_80387_constants_init)
4765 init_ext_80387_constants ();
4767 switch (idx)
4769 case 3:
4770 case 4:
4771 case 5:
4772 case 6:
4773 case 7:
4774 i = idx - 3;
4775 break;
4777 default:
4778 gcc_unreachable ();
4781 return const_double_from_real_value (ext_80387_constants_table[i],
4782 XFmode);
4785 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
4786 in supported SSE/AVX vector mode. */
4789 standard_sse_constant_p (rtx x, machine_mode pred_mode)
4791 machine_mode mode;
4793 if (!TARGET_SSE)
4794 return 0;
4796 mode = GET_MODE (x);
4798 if (x == const0_rtx || const0_operand (x, mode))
4799 return 1;
4801 if (x == constm1_rtx || vector_all_ones_operand (x, mode))
4803 /* VOIDmode integer constant, get mode from the predicate. */
4804 if (mode == VOIDmode)
4805 mode = pred_mode;
4807 switch (GET_MODE_SIZE (mode))
4809 case 64:
4810 if (TARGET_AVX512F)
4811 return 2;
4812 break;
4813 case 32:
4814 if (TARGET_AVX2)
4815 return 2;
4816 break;
4817 case 16:
4818 if (TARGET_SSE2)
4819 return 2;
4820 break;
4821 case 0:
4822 /* VOIDmode */
4823 gcc_unreachable ();
4824 default:
4825 break;
4829 return 0;
4832 /* Return the opcode of the special instruction to be used to load
4833 the constant operands[1] into operands[0]. */
4835 const char *
4836 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
4838 machine_mode mode;
4839 rtx x = operands[1];
4841 gcc_assert (TARGET_SSE);
4843 mode = GET_MODE (x);
4845 if (x == const0_rtx || const0_operand (x, mode))
4847 switch (get_attr_mode (insn))
4849 case MODE_TI:
4850 if (!EXT_REX_SSE_REG_P (operands[0]))
4851 return "%vpxor\t%0, %d0";
4852 /* FALLTHRU */
4853 case MODE_XI:
4854 case MODE_OI:
4855 if (EXT_REX_SSE_REG_P (operands[0]))
4856 return (TARGET_AVX512VL
4857 ? "vpxord\t%x0, %x0, %x0"
4858 : "vpxord\t%g0, %g0, %g0");
4859 return "vpxor\t%x0, %x0, %x0";
4861 case MODE_V2DF:
4862 if (!EXT_REX_SSE_REG_P (operands[0]))
4863 return "%vxorpd\t%0, %d0";
4864 /* FALLTHRU */
4865 case MODE_V8DF:
4866 case MODE_V4DF:
4867 if (!EXT_REX_SSE_REG_P (operands[0]))
4868 return "vxorpd\t%x0, %x0, %x0";
4869 else if (TARGET_AVX512DQ)
4870 return (TARGET_AVX512VL
4871 ? "vxorpd\t%x0, %x0, %x0"
4872 : "vxorpd\t%g0, %g0, %g0");
4873 else
4874 return (TARGET_AVX512VL
4875 ? "vpxorq\t%x0, %x0, %x0"
4876 : "vpxorq\t%g0, %g0, %g0");
4878 case MODE_V4SF:
4879 if (!EXT_REX_SSE_REG_P (operands[0]))
4880 return "%vxorps\t%0, %d0";
4881 /* FALLTHRU */
4882 case MODE_V16SF:
4883 case MODE_V8SF:
4884 if (!EXT_REX_SSE_REG_P (operands[0]))
4885 return "vxorps\t%x0, %x0, %x0";
4886 else if (TARGET_AVX512DQ)
4887 return (TARGET_AVX512VL
4888 ? "vxorps\t%x0, %x0, %x0"
4889 : "vxorps\t%g0, %g0, %g0");
4890 else
4891 return (TARGET_AVX512VL
4892 ? "vpxord\t%x0, %x0, %x0"
4893 : "vpxord\t%g0, %g0, %g0");
4895 default:
4896 gcc_unreachable ();
4899 else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
4901 enum attr_mode insn_mode = get_attr_mode (insn);
4903 switch (insn_mode)
4905 case MODE_XI:
4906 case MODE_V8DF:
4907 case MODE_V16SF:
4908 gcc_assert (TARGET_AVX512F);
4909 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4911 case MODE_OI:
4912 case MODE_V4DF:
4913 case MODE_V8SF:
4914 gcc_assert (TARGET_AVX2);
4915 /* FALLTHRU */
4916 case MODE_TI:
4917 case MODE_V2DF:
4918 case MODE_V4SF:
4919 gcc_assert (TARGET_SSE2);
4920 if (!EXT_REX_SSE_REG_P (operands[0]))
4921 return (TARGET_AVX
4922 ? "vpcmpeqd\t%0, %0, %0"
4923 : "pcmpeqd\t%0, %0");
4924 else if (TARGET_AVX512VL)
4925 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
4926 else
4927 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4929 default:
4930 gcc_unreachable ();
4934 gcc_unreachable ();
4937 /* Returns true if INSN can be transformed from a memory load
4938 to a supported FP constant load. */
4940 bool
4941 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
4943 rtx src = find_constant_src (insn);
4945 gcc_assert (REG_P (dst));
4947 if (src == NULL
4948 || (SSE_REGNO_P (REGNO (dst))
4949 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
4950 || (STACK_REGNO_P (REGNO (dst))
4951 && standard_80387_constant_p (src) < 1))
4952 return false;
4954 return true;
4957 /* Returns true if OP contains a symbol reference */
4959 bool
4960 symbolic_reference_mentioned_p (rtx op)
4962 const char *fmt;
4963 int i;
4965 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4966 return true;
4968 fmt = GET_RTX_FORMAT (GET_CODE (op));
4969 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4971 if (fmt[i] == 'E')
4973 int j;
4975 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4976 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4977 return true;
4980 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4981 return true;
4984 return false;
4987 /* Return true if it is appropriate to emit `ret' instructions in the
4988 body of a function. Do this only if the epilogue is simple, needing a
4989 couple of insns. Prior to reloading, we can't tell how many registers
4990 must be saved, so return false then. Return false if there is no frame
4991 marker to de-allocate. */
4993 bool
4994 ix86_can_use_return_insn_p (void)
4996 if (ix86_function_naked (current_function_decl))
4997 return false;
4999 /* Don't use `ret' instruction in interrupt handler. */
5000 if (! reload_completed
5001 || frame_pointer_needed
5002 || cfun->machine->func_type != TYPE_NORMAL)
5003 return 0;
5005 /* Don't allow more than 32k pop, since that's all we can do
5006 with one instruction. */
5007 if (crtl->args.pops_args && crtl->args.size >= 32768)
5008 return 0;
5010 struct ix86_frame &frame = cfun->machine->frame;
5011 return (frame.stack_pointer_offset == UNITS_PER_WORD
5012 && (frame.nregs + frame.nsseregs) == 0);
5015 /* Value should be nonzero if functions must have frame pointers.
5016 Zero means the frame pointer need not be set up (and parms may
5017 be accessed via the stack pointer) in functions that seem suitable. */
5019 static bool
5020 ix86_frame_pointer_required (void)
5022 /* If we accessed previous frames, then the generated code expects
5023 to be able to access the saved ebp value in our frame. */
5024 if (cfun->machine->accesses_prev_frame)
5025 return true;
5027 /* Several x86 os'es need a frame pointer for other reasons,
5028 usually pertaining to setjmp. */
5029 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5030 return true;
5032 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5033 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
5034 return true;
5036 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5037 allocation is 4GB. */
5038 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
5039 return true;
5041 /* SSE saves require frame-pointer when stack is misaligned. */
5042 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
5043 return true;
5045 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5046 turns off the frame pointer by default. Turn it back on now if
5047 we've not got a leaf function. */
5048 if (TARGET_OMIT_LEAF_FRAME_POINTER
5049 && (!crtl->is_leaf
5050 || ix86_current_function_calls_tls_descriptor))
5051 return true;
5053 if (crtl->profile && !flag_fentry)
5054 return true;
5056 return false;
5059 /* Record that the current function accesses previous call frames. */
5061 void
5062 ix86_setup_frame_addresses (void)
5064 cfun->machine->accesses_prev_frame = 1;
5067 #ifndef USE_HIDDEN_LINKONCE
5068 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5069 # define USE_HIDDEN_LINKONCE 1
5070 # else
5071 # define USE_HIDDEN_LINKONCE 0
5072 # endif
5073 #endif
5075 /* Label count for call and return thunks. It is used to make unique
5076 labels in call and return thunks. */
5077 static int indirectlabelno;
5079 /* True if call thunk function is needed. */
5080 static bool indirect_thunk_needed = false;
5082 /* Bit masks of integer registers, which contain branch target, used
5083 by call thunk functions. */
5084 static int indirect_thunks_used;
5086 /* True if return thunk function is needed. */
5087 static bool indirect_return_needed = false;
5089 /* True if return thunk function via CX is needed. */
5090 static bool indirect_return_via_cx;
5092 #ifndef INDIRECT_LABEL
5093 # define INDIRECT_LABEL "LIND"
5094 #endif
5096 /* Indicate what prefix is needed for an indirect branch. */
5097 enum indirect_thunk_prefix
5099 indirect_thunk_prefix_none,
5100 indirect_thunk_prefix_nt
5103 /* Return the prefix needed for an indirect branch INSN. */
5105 enum indirect_thunk_prefix
5106 indirect_thunk_need_prefix (rtx_insn *insn)
5108 enum indirect_thunk_prefix need_prefix;
5109 if ((cfun->machine->indirect_branch_type
5110 == indirect_branch_thunk_extern)
5111 && ix86_notrack_prefixed_insn_p (insn))
5113 /* NOTRACK prefix is only used with external thunk so that it
5114 can be properly updated to support CET at run-time. */
5115 need_prefix = indirect_thunk_prefix_nt;
5117 else
5118 need_prefix = indirect_thunk_prefix_none;
5119 return need_prefix;
5122 /* Fills in the label name that should be used for the indirect thunk. */
5124 static void
5125 indirect_thunk_name (char name[32], unsigned int regno,
5126 enum indirect_thunk_prefix need_prefix,
5127 bool ret_p)
5129 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
5130 gcc_unreachable ();
5132 if (USE_HIDDEN_LINKONCE)
5134 const char *prefix;
5136 if (need_prefix == indirect_thunk_prefix_nt
5137 && regno != INVALID_REGNUM)
5139 /* NOTRACK prefix is only used with external thunk via
5140 register so that NOTRACK prefix can be added to indirect
5141 branch via register to support CET at run-time. */
5142 prefix = "_nt";
5144 else
5145 prefix = "";
5147 const char *ret = ret_p ? "return" : "indirect";
5149 if (regno != INVALID_REGNUM)
5151 const char *reg_prefix;
5152 if (LEGACY_INT_REGNO_P (regno))
5153 reg_prefix = TARGET_64BIT ? "r" : "e";
5154 else
5155 reg_prefix = "";
5156 sprintf (name, "__x86_%s_thunk%s_%s%s",
5157 ret, prefix, reg_prefix, reg_names[regno]);
5159 else
5160 sprintf (name, "__x86_%s_thunk%s", ret, prefix);
5162 else
5164 if (regno != INVALID_REGNUM)
5165 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
5166 else
5168 if (ret_p)
5169 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
5170 else
5171 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
5176 /* Output a call and return thunk for indirect branch. If REGNO != -1,
5177 the function address is in REGNO and the call and return thunk looks like:
5179 call L2
5181 pause
5182 lfence
5183 jmp L1
5185 mov %REG, (%sp)
5188 Otherwise, the function address is on the top of stack and the
5189 call and return thunk looks like:
5191 call L2
5193 pause
5194 lfence
5195 jmp L1
5197 lea WORD_SIZE(%sp), %sp
5201 static void
5202 output_indirect_thunk (unsigned int regno)
5204 char indirectlabel1[32];
5205 char indirectlabel2[32];
5207 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
5208 indirectlabelno++);
5209 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
5210 indirectlabelno++);
5212 /* Call */
5213 fputs ("\tcall\t", asm_out_file);
5214 assemble_name_raw (asm_out_file, indirectlabel2);
5215 fputc ('\n', asm_out_file);
5217 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
5219 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
5220 Usage of both pause + lfence is compromise solution. */
5221 fprintf (asm_out_file, "\tpause\n\tlfence\n");
5223 /* Jump. */
5224 fputs ("\tjmp\t", asm_out_file);
5225 assemble_name_raw (asm_out_file, indirectlabel1);
5226 fputc ('\n', asm_out_file);
5228 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
5230 /* The above call insn pushed a word to stack. Adjust CFI info. */
5231 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
5233 if (! dwarf2out_do_cfi_asm ())
5235 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5236 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
5237 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
5238 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5240 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5241 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
5242 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
5243 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5244 dwarf2out_emit_cfi (xcfi);
5247 if (regno != INVALID_REGNUM)
5249 /* MOV. */
5250 rtx xops[2];
5251 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
5252 xops[1] = gen_rtx_REG (word_mode, regno);
5253 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
5255 else
5257 /* LEA. */
5258 rtx xops[2];
5259 xops[0] = stack_pointer_rtx;
5260 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
5261 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
5264 fputs ("\tret\n", asm_out_file);
5267 /* Output a funtion with a call and return thunk for indirect branch.
5268 If REGNO != INVALID_REGNUM, the function address is in REGNO.
5269 Otherwise, the function address is on the top of stack. Thunk is
5270 used for function return if RET_P is true. */
5272 static void
5273 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
5274 unsigned int regno, bool ret_p)
5276 char name[32];
5277 tree decl;
5279 /* Create __x86_indirect_thunk. */
5280 indirect_thunk_name (name, regno, need_prefix, ret_p);
5281 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5282 get_identifier (name),
5283 build_function_type_list (void_type_node, NULL_TREE));
5284 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5285 NULL_TREE, void_type_node);
5286 TREE_PUBLIC (decl) = 1;
5287 TREE_STATIC (decl) = 1;
5288 DECL_IGNORED_P (decl) = 1;
5290 #if TARGET_MACHO
5291 if (TARGET_MACHO)
5293 switch_to_section (darwin_sections[picbase_thunk_section]);
5294 fputs ("\t.weak_definition\t", asm_out_file);
5295 assemble_name (asm_out_file, name);
5296 fputs ("\n\t.private_extern\t", asm_out_file);
5297 assemble_name (asm_out_file, name);
5298 putc ('\n', asm_out_file);
5299 ASM_OUTPUT_LABEL (asm_out_file, name);
5300 DECL_WEAK (decl) = 1;
5302 else
5303 #endif
5304 if (USE_HIDDEN_LINKONCE)
5306 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5308 targetm.asm_out.unique_section (decl, 0);
5309 switch_to_section (get_named_section (decl, NULL, 0));
5311 targetm.asm_out.globalize_label (asm_out_file, name);
5312 fputs ("\t.hidden\t", asm_out_file);
5313 assemble_name (asm_out_file, name);
5314 putc ('\n', asm_out_file);
5315 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5317 else
5319 switch_to_section (text_section);
5320 ASM_OUTPUT_LABEL (asm_out_file, name);
5323 DECL_INITIAL (decl) = make_node (BLOCK);
5324 current_function_decl = decl;
5325 allocate_struct_function (decl, false);
5326 init_function_start (decl);
5327 /* We're about to hide the function body from callees of final_* by
5328 emitting it directly; tell them we're a thunk, if they care. */
5329 cfun->is_thunk = true;
5330 first_function_block_is_cold = false;
5331 /* Make sure unwind info is emitted for the thunk if needed. */
5332 final_start_function (emit_barrier (), asm_out_file, 1);
5334 output_indirect_thunk (regno);
5336 final_end_function ();
5337 init_insn_lengths ();
5338 free_after_compilation (cfun);
5339 set_cfun (NULL);
5340 current_function_decl = NULL;
5343 static int pic_labels_used;
5345 /* Fills in the label name that should be used for a pc thunk for
5346 the given register. */
5348 static void
5349 get_pc_thunk_name (char name[32], unsigned int regno)
5351 gcc_assert (!TARGET_64BIT);
5353 if (USE_HIDDEN_LINKONCE)
5354 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
5355 else
5356 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5360 /* This function generates code for -fpic that loads %ebx with
5361 the return address of the caller and then returns. */
5363 static void
5364 ix86_code_end (void)
5366 rtx xops[2];
5367 unsigned int regno;
5369 if (indirect_return_needed)
5370 output_indirect_thunk_function (indirect_thunk_prefix_none,
5371 INVALID_REGNUM, true);
5372 if (indirect_return_via_cx)
5373 output_indirect_thunk_function (indirect_thunk_prefix_none,
5374 CX_REG, true);
5375 if (indirect_thunk_needed)
5376 output_indirect_thunk_function (indirect_thunk_prefix_none,
5377 INVALID_REGNUM, false);
5379 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
5381 unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
5382 if ((indirect_thunks_used & (1 << i)))
5383 output_indirect_thunk_function (indirect_thunk_prefix_none,
5384 regno, false);
5387 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
5389 char name[32];
5390 tree decl;
5392 if ((indirect_thunks_used & (1 << regno)))
5393 output_indirect_thunk_function (indirect_thunk_prefix_none,
5394 regno, false);
5396 if (!(pic_labels_used & (1 << regno)))
5397 continue;
5399 get_pc_thunk_name (name, regno);
5401 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5402 get_identifier (name),
5403 build_function_type_list (void_type_node, NULL_TREE));
5404 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5405 NULL_TREE, void_type_node);
5406 TREE_PUBLIC (decl) = 1;
5407 TREE_STATIC (decl) = 1;
5408 DECL_IGNORED_P (decl) = 1;
5410 #if TARGET_MACHO
5411 if (TARGET_MACHO)
5413 switch_to_section (darwin_sections[picbase_thunk_section]);
5414 fputs ("\t.weak_definition\t", asm_out_file);
5415 assemble_name (asm_out_file, name);
5416 fputs ("\n\t.private_extern\t", asm_out_file);
5417 assemble_name (asm_out_file, name);
5418 putc ('\n', asm_out_file);
5419 ASM_OUTPUT_LABEL (asm_out_file, name);
5420 DECL_WEAK (decl) = 1;
5422 else
5423 #endif
5424 if (USE_HIDDEN_LINKONCE)
5426 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5428 targetm.asm_out.unique_section (decl, 0);
5429 switch_to_section (get_named_section (decl, NULL, 0));
5431 targetm.asm_out.globalize_label (asm_out_file, name);
5432 fputs ("\t.hidden\t", asm_out_file);
5433 assemble_name (asm_out_file, name);
5434 putc ('\n', asm_out_file);
5435 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5437 else
5439 switch_to_section (text_section);
5440 ASM_OUTPUT_LABEL (asm_out_file, name);
5443 DECL_INITIAL (decl) = make_node (BLOCK);
5444 current_function_decl = decl;
5445 allocate_struct_function (decl, false);
5446 init_function_start (decl);
5447 /* We're about to hide the function body from callees of final_* by
5448 emitting it directly; tell them we're a thunk, if they care. */
5449 cfun->is_thunk = true;
5450 first_function_block_is_cold = false;
5451 /* Make sure unwind info is emitted for the thunk if needed. */
5452 final_start_function (emit_barrier (), asm_out_file, 1);
5454 /* Pad stack IP move with 4 instructions (two NOPs count
5455 as one instruction). */
5456 if (TARGET_PAD_SHORT_FUNCTION)
5458 int i = 8;
5460 while (i--)
5461 fputs ("\tnop\n", asm_out_file);
5464 xops[0] = gen_rtx_REG (Pmode, regno);
5465 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
5466 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
5467 output_asm_insn ("%!ret", NULL);
5468 final_end_function ();
5469 init_insn_lengths ();
5470 free_after_compilation (cfun);
5471 set_cfun (NULL);
5472 current_function_decl = NULL;
5475 if (flag_split_stack)
5476 file_end_indicate_split_stack ();
5479 /* Emit code for the SET_GOT patterns. */
5481 const char *
5482 output_set_got (rtx dest, rtx label)
5484 rtx xops[3];
5486 xops[0] = dest;
5488 if (TARGET_VXWORKS_RTP && flag_pic)
5490 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5491 xops[2] = gen_rtx_MEM (Pmode,
5492 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5493 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5495 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5496 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5497 an unadorned address. */
5498 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5499 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5500 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5501 return "";
5504 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5506 if (flag_pic)
5508 char name[32];
5509 get_pc_thunk_name (name, REGNO (dest));
5510 pic_labels_used |= 1 << REGNO (dest);
5512 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5513 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5514 output_asm_insn ("%!call\t%X2", xops);
5516 #if TARGET_MACHO
5517 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
5518 This is what will be referenced by the Mach-O PIC subsystem. */
5519 if (machopic_should_output_picbase_label () || !label)
5520 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
5522 /* When we are restoring the pic base at the site of a nonlocal label,
5523 and we decided to emit the pic base above, we will still output a
5524 local label used for calculating the correction offset (even though
5525 the offset will be 0 in that case). */
5526 if (label)
5527 targetm.asm_out.internal_label (asm_out_file, "L",
5528 CODE_LABEL_NUMBER (label));
5529 #endif
5531 else
5533 if (TARGET_MACHO)
5534 /* We don't need a pic base, we're not producing pic. */
5535 gcc_unreachable ();
5537 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5538 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
5539 targetm.asm_out.internal_label (asm_out_file, "L",
5540 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5543 if (!TARGET_MACHO)
5544 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
5546 return "";
5549 /* Generate an "push" pattern for input ARG. */
5552 gen_push (rtx arg)
5554 struct machine_function *m = cfun->machine;
5556 if (m->fs.cfa_reg == stack_pointer_rtx)
5557 m->fs.cfa_offset += UNITS_PER_WORD;
5558 m->fs.sp_offset += UNITS_PER_WORD;
5560 if (REG_P (arg) && GET_MODE (arg) != word_mode)
5561 arg = gen_rtx_REG (word_mode, REGNO (arg));
5563 return gen_rtx_SET (gen_rtx_MEM (word_mode,
5564 gen_rtx_PRE_DEC (Pmode,
5565 stack_pointer_rtx)),
5566 arg);
5569 /* Generate an "pop" pattern for input ARG. */
5572 gen_pop (rtx arg)
5574 if (REG_P (arg) && GET_MODE (arg) != word_mode)
5575 arg = gen_rtx_REG (word_mode, REGNO (arg));
5577 return gen_rtx_SET (arg,
5578 gen_rtx_MEM (word_mode,
5579 gen_rtx_POST_INC (Pmode,
5580 stack_pointer_rtx)));
5583 /* Return >= 0 if there is an unused call-clobbered register available
5584 for the entire function. */
5586 static unsigned int
5587 ix86_select_alt_pic_regnum (void)
5589 if (ix86_use_pseudo_pic_reg ())
5590 return INVALID_REGNUM;
5592 if (crtl->is_leaf
5593 && !crtl->profile
5594 && !ix86_current_function_calls_tls_descriptor)
5596 int i, drap;
5597 /* Can't use the same register for both PIC and DRAP. */
5598 if (crtl->drap_reg)
5599 drap = REGNO (crtl->drap_reg);
5600 else
5601 drap = -1;
5602 for (i = 2; i >= 0; --i)
5603 if (i != drap && !df_regs_ever_live_p (i))
5604 return i;
5607 return INVALID_REGNUM;
5610 /* Return true if REGNO is used by the epilogue. */
5612 bool
5613 ix86_epilogue_uses (int regno)
5615 /* If there are no caller-saved registers, we preserve all registers,
5616 except for MMX and x87 registers which aren't supported when saving
5617 and restoring registers. Don't explicitly save SP register since
5618 it is always preserved. */
5619 return (epilogue_completed
5620 && cfun->machine->no_caller_saved_registers
5621 && !fixed_regs[regno]
5622 && !STACK_REGNO_P (regno)
5623 && !MMX_REGNO_P (regno));
5626 /* Return nonzero if register REGNO can be used as a scratch register
5627 in peephole2. */
5629 static bool
5630 ix86_hard_regno_scratch_ok (unsigned int regno)
5632 /* If there are no caller-saved registers, we can't use any register
5633 as a scratch register after epilogue and use REGNO as scratch
5634 register only if it has been used before to avoid saving and
5635 restoring it. */
5636 return (!cfun->machine->no_caller_saved_registers
5637 || (!epilogue_completed
5638 && df_regs_ever_live_p (regno)));
5641 /* Return TRUE if we need to save REGNO. */
5643 bool
5644 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
5646 /* If there are no caller-saved registers, we preserve all registers,
5647 except for MMX and x87 registers which aren't supported when saving
5648 and restoring registers. Don't explicitly save SP register since
5649 it is always preserved. */
5650 if (cfun->machine->no_caller_saved_registers)
5652 /* Don't preserve registers used for function return value. */
5653 rtx reg = crtl->return_rtx;
5654 if (reg)
5656 unsigned int i = REGNO (reg);
5657 unsigned int nregs = REG_NREGS (reg);
5658 while (nregs-- > 0)
5659 if ((i + nregs) == regno)
5660 return false;
5663 return (df_regs_ever_live_p (regno)
5664 && !fixed_regs[regno]
5665 && !STACK_REGNO_P (regno)
5666 && !MMX_REGNO_P (regno)
5667 && (regno != HARD_FRAME_POINTER_REGNUM
5668 || !frame_pointer_needed));
5671 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
5672 && pic_offset_table_rtx)
5674 if (ix86_use_pseudo_pic_reg ())
5676 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
5677 _mcount in prologue. */
5678 if (!TARGET_64BIT && flag_pic && crtl->profile)
5679 return true;
5681 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5682 || crtl->profile
5683 || crtl->calls_eh_return
5684 || crtl->uses_const_pool
5685 || cfun->has_nonlocal_label)
5686 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
5689 if (crtl->calls_eh_return && maybe_eh_return)
5691 unsigned i;
5692 for (i = 0; ; i++)
5694 unsigned test = EH_RETURN_DATA_REGNO (i);
5695 if (test == INVALID_REGNUM)
5696 break;
5697 if (test == regno)
5698 return true;
5702 if (ignore_outlined && cfun->machine->call_ms2sysv)
5704 unsigned count = cfun->machine->call_ms2sysv_extra_regs
5705 + xlogue_layout::MIN_REGS;
5706 if (xlogue_layout::is_stub_managed_reg (regno, count))
5707 return false;
5710 if (crtl->drap_reg
5711 && regno == REGNO (crtl->drap_reg)
5712 && !cfun->machine->no_drap_save_restore)
5713 return true;
5715 return (df_regs_ever_live_p (regno)
5716 && !call_used_regs[regno]
5717 && !fixed_regs[regno]
5718 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5721 /* Return number of saved general prupose registers. */
5723 static int
5724 ix86_nsaved_regs (void)
5726 int nregs = 0;
5727 int regno;
5729 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5730 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
5731 nregs ++;
5732 return nregs;
5735 /* Return number of saved SSE registers. */
5737 static int
5738 ix86_nsaved_sseregs (void)
5740 int nregs = 0;
5741 int regno;
5743 if (!TARGET_64BIT_MS_ABI)
5744 return 0;
5745 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5746 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
5747 nregs ++;
5748 return nregs;
5751 /* Given FROM and TO register numbers, say whether this elimination is
5752 allowed. If stack alignment is needed, we can only replace argument
5753 pointer with hard frame pointer, or replace frame pointer with stack
5754 pointer. Otherwise, frame pointer elimination is automatically
5755 handled and all other eliminations are valid. */
5757 static bool
5758 ix86_can_eliminate (const int from, const int to)
5760 if (stack_realign_fp)
5761 return ((from == ARG_POINTER_REGNUM
5762 && to == HARD_FRAME_POINTER_REGNUM)
5763 || (from == FRAME_POINTER_REGNUM
5764 && to == STACK_POINTER_REGNUM));
5765 else
5766 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
5769 /* Return the offset between two registers, one to be eliminated, and the other
5770 its replacement, at the start of a routine. */
5772 HOST_WIDE_INT
5773 ix86_initial_elimination_offset (int from, int to)
5775 struct ix86_frame &frame = cfun->machine->frame;
5777 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5778 return frame.hard_frame_pointer_offset;
5779 else if (from == FRAME_POINTER_REGNUM
5780 && to == HARD_FRAME_POINTER_REGNUM)
5781 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5782 else
5784 gcc_assert (to == STACK_POINTER_REGNUM);
5786 if (from == ARG_POINTER_REGNUM)
5787 return frame.stack_pointer_offset;
5789 gcc_assert (from == FRAME_POINTER_REGNUM);
5790 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5794 /* In a dynamically-aligned function, we can't know the offset from
5795 stack pointer to frame pointer, so we must ensure that setjmp
5796 eliminates fp against the hard fp (%ebp) rather than trying to
5797 index from %esp up to the top of the frame across a gap that is
5798 of unknown (at compile-time) size. */
5799 static rtx
5800 ix86_builtin_setjmp_frame_value (void)
5802 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
5805 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
5806 void warn_once_call_ms2sysv_xlogues (const char *feature)
5808 static bool warned_once = false;
5809 if (!warned_once)
5811 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
5812 feature);
5813 warned_once = true;
5817 /* Return the probing interval for -fstack-clash-protection. */
5819 static HOST_WIDE_INT
5820 get_probe_interval (void)
5822 if (flag_stack_clash_protection)
5823 return (HOST_WIDE_INT_1U
5824 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
5825 else
5826 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
5829 /* When using -fsplit-stack, the allocation routines set a field in
5830 the TCB to the bottom of the stack plus this much space, measured
5831 in bytes. */
5833 #define SPLIT_STACK_AVAILABLE 256
5835 /* Fill structure ix86_frame about frame of currently computed function. */
5837 static void
5838 ix86_compute_frame_layout (void)
5840 struct ix86_frame *frame = &cfun->machine->frame;
5841 struct machine_function *m = cfun->machine;
5842 unsigned HOST_WIDE_INT stack_alignment_needed;
5843 HOST_WIDE_INT offset;
5844 unsigned HOST_WIDE_INT preferred_alignment;
5845 HOST_WIDE_INT size = get_frame_size ();
5846 HOST_WIDE_INT to_allocate;
5848 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
5849 * ms_abi functions that call a sysv function. We now need to prune away
5850 * cases where it should be disabled. */
5851 if (TARGET_64BIT && m->call_ms2sysv)
5853 gcc_assert (TARGET_64BIT_MS_ABI);
5854 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
5855 gcc_assert (!TARGET_SEH);
5856 gcc_assert (TARGET_SSE);
5857 gcc_assert (!ix86_using_red_zone ());
5859 if (crtl->calls_eh_return)
5861 gcc_assert (!reload_completed);
5862 m->call_ms2sysv = false;
5863 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
5866 else if (ix86_static_chain_on_stack)
5868 gcc_assert (!reload_completed);
5869 m->call_ms2sysv = false;
5870 warn_once_call_ms2sysv_xlogues ("static call chains");
5873 /* Finally, compute which registers the stub will manage. */
5874 else
5876 unsigned count = xlogue_layout::count_stub_managed_regs ();
5877 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
5878 m->call_ms2sysv_pad_in = 0;
5882 frame->nregs = ix86_nsaved_regs ();
5883 frame->nsseregs = ix86_nsaved_sseregs ();
5885 /* 64-bit MS ABI seem to require stack alignment to be always 16,
5886 except for function prologues, leaf functions and when the defult
5887 incoming stack boundary is overriden at command line or via
5888 force_align_arg_pointer attribute.
5890 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
5891 at call sites, including profile function calls.
5893 if (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
5894 && crtl->preferred_stack_boundary < 128)
5895 && (!crtl->is_leaf || cfun->calls_alloca != 0
5896 || ix86_current_function_calls_tls_descriptor
5897 || (TARGET_MACHO && crtl->profile)
5898 || ix86_incoming_stack_boundary < 128))
5900 crtl->preferred_stack_boundary = 128;
5901 crtl->stack_alignment_needed = 128;
5904 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
5905 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
5907 gcc_assert (!size || stack_alignment_needed);
5908 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5909 gcc_assert (preferred_alignment <= stack_alignment_needed);
5911 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
5912 gcc_assert (TARGET_64BIT || !frame->nsseregs);
5913 if (TARGET_64BIT && m->call_ms2sysv)
5915 gcc_assert (stack_alignment_needed >= 16);
5916 gcc_assert (!frame->nsseregs);
5919 /* For SEH we have to limit the amount of code movement into the prologue.
5920 At present we do this via a BLOCKAGE, at which point there's very little
5921 scheduling that can be done, which means that there's very little point
5922 in doing anything except PUSHs. */
5923 if (TARGET_SEH)
5924 m->use_fast_prologue_epilogue = false;
5925 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
5927 int count = frame->nregs;
5928 struct cgraph_node *node = cgraph_node::get (current_function_decl);
5930 /* The fast prologue uses move instead of push to save registers. This
5931 is significantly longer, but also executes faster as modern hardware
5932 can execute the moves in parallel, but can't do that for push/pop.
5934 Be careful about choosing what prologue to emit: When function takes
5935 many instructions to execute we may use slow version as well as in
5936 case function is known to be outside hot spot (this is known with
5937 feedback only). Weight the size of function by number of registers
5938 to save as it is cheap to use one or two push instructions but very
5939 slow to use many of them. */
5940 if (count)
5941 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5942 if (node->frequency < NODE_FREQUENCY_NORMAL
5943 || (flag_branch_probabilities
5944 && node->frequency < NODE_FREQUENCY_HOT))
5945 m->use_fast_prologue_epilogue = false;
5946 else
5947 m->use_fast_prologue_epilogue
5948 = !expensive_function_p (count);
5951 frame->save_regs_using_mov
5952 = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
5953 /* If static stack checking is enabled and done with probes,
5954 the registers need to be saved before allocating the frame. */
5955 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
5957 /* Skip return address and error code in exception handler. */
5958 offset = INCOMING_FRAME_SP_OFFSET;
5960 /* Skip pushed static chain. */
5961 if (ix86_static_chain_on_stack)
5962 offset += UNITS_PER_WORD;
5964 /* Skip saved base pointer. */
5965 if (frame_pointer_needed)
5966 offset += UNITS_PER_WORD;
5967 frame->hfp_save_offset = offset;
5969 /* The traditional frame pointer location is at the top of the frame. */
5970 frame->hard_frame_pointer_offset = offset;
5972 /* Register save area */
5973 offset += frame->nregs * UNITS_PER_WORD;
5974 frame->reg_save_offset = offset;
5976 /* On SEH target, registers are pushed just before the frame pointer
5977 location. */
5978 if (TARGET_SEH)
5979 frame->hard_frame_pointer_offset = offset;
5981 /* Calculate the size of the va-arg area (not including padding, if any). */
5982 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
5984 /* Also adjust stack_realign_offset for the largest alignment of
5985 stack slot actually used. */
5986 if (stack_realign_fp
5987 || (cfun->machine->max_used_stack_alignment != 0
5988 && (offset % cfun->machine->max_used_stack_alignment) != 0))
5990 /* We may need a 16-byte aligned stack for the remainder of the
5991 register save area, but the stack frame for the local function
5992 may require a greater alignment if using AVX/2/512. In order
5993 to avoid wasting space, we first calculate the space needed for
5994 the rest of the register saves, add that to the stack pointer,
5995 and then realign the stack to the boundary of the start of the
5996 frame for the local function. */
5997 HOST_WIDE_INT space_needed = 0;
5998 HOST_WIDE_INT sse_reg_space_needed = 0;
6000 if (TARGET_64BIT)
6002 if (m->call_ms2sysv)
6004 m->call_ms2sysv_pad_in = 0;
6005 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
6008 else if (frame->nsseregs)
6009 /* The only ABI that has saved SSE registers (Win64) also has a
6010 16-byte aligned default stack. However, many programs violate
6011 the ABI, and Wine64 forces stack realignment to compensate. */
6012 space_needed = frame->nsseregs * 16;
6014 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
6016 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6017 rounding to be pedantic. */
6018 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
6020 else
6021 space_needed = frame->va_arg_size;
6023 /* Record the allocation size required prior to the realignment AND. */
6024 frame->stack_realign_allocate = space_needed;
6026 /* The re-aligned stack starts at frame->stack_realign_offset. Values
6027 before this point are not directly comparable with values below
6028 this point. Use sp_valid_at to determine if the stack pointer is
6029 valid for a given offset, fp_valid_at for the frame pointer, or
6030 choose_baseaddr to have a base register chosen for you.
6032 Note that the result of (frame->stack_realign_offset
6033 & (stack_alignment_needed - 1)) may not equal zero. */
6034 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
6035 frame->stack_realign_offset = offset - space_needed;
6036 frame->sse_reg_save_offset = frame->stack_realign_offset
6037 + sse_reg_space_needed;
6039 else
6041 frame->stack_realign_offset = offset;
6043 if (TARGET_64BIT && m->call_ms2sysv)
6045 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
6046 offset += xlogue_layout::get_instance ().get_stack_space_used ();
6049 /* Align and set SSE register save area. */
6050 else if (frame->nsseregs)
6052 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6053 required and the DRAP re-alignment boundary is at least 16 bytes,
6054 then we want the SSE register save area properly aligned. */
6055 if (ix86_incoming_stack_boundary >= 128
6056 || (stack_realign_drap && stack_alignment_needed >= 16))
6057 offset = ROUND_UP (offset, 16);
6058 offset += frame->nsseregs * 16;
6060 frame->sse_reg_save_offset = offset;
6061 offset += frame->va_arg_size;
6064 /* Align start of frame for local function. When a function call
6065 is removed, it may become a leaf function. But if argument may
6066 be passed on stack, we need to align the stack when there is no
6067 tail call. */
6068 if (m->call_ms2sysv
6069 || frame->va_arg_size != 0
6070 || size != 0
6071 || !crtl->is_leaf
6072 || (!crtl->tail_call_emit
6073 && cfun->machine->outgoing_args_on_stack)
6074 || cfun->calls_alloca
6075 || ix86_current_function_calls_tls_descriptor)
6076 offset = ROUND_UP (offset, stack_alignment_needed);
6078 /* Frame pointer points here. */
6079 frame->frame_pointer_offset = offset;
6081 offset += size;
6083 /* Add outgoing arguments area. Can be skipped if we eliminated
6084 all the function calls as dead code.
6085 Skipping is however impossible when function calls alloca. Alloca
6086 expander assumes that last crtl->outgoing_args_size
6087 of stack frame are unused. */
6088 if (ACCUMULATE_OUTGOING_ARGS
6089 && (!crtl->is_leaf || cfun->calls_alloca
6090 || ix86_current_function_calls_tls_descriptor))
6092 offset += crtl->outgoing_args_size;
6093 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6095 else
6096 frame->outgoing_arguments_size = 0;
6098 /* Align stack boundary. Only needed if we're calling another function
6099 or using alloca. */
6100 if (!crtl->is_leaf || cfun->calls_alloca
6101 || ix86_current_function_calls_tls_descriptor)
6102 offset = ROUND_UP (offset, preferred_alignment);
6104 /* We've reached end of stack frame. */
6105 frame->stack_pointer_offset = offset;
6107 /* Size prologue needs to allocate. */
6108 to_allocate = offset - frame->sse_reg_save_offset;
6110 if ((!to_allocate && frame->nregs <= 1)
6111 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
6112 /* If stack clash probing needs a loop, then it needs a
6113 scratch register. But the returned register is only guaranteed
6114 to be safe to use after register saves are complete. So if
6115 stack clash protections are enabled and the allocated frame is
6116 larger than the probe interval, then use pushes to save
6117 callee saved registers. */
6118 || (flag_stack_clash_protection && to_allocate > get_probe_interval ()))
6119 frame->save_regs_using_mov = false;
6121 if (ix86_using_red_zone ()
6122 && crtl->sp_is_unchanging
6123 && crtl->is_leaf
6124 && !ix86_pc_thunk_call_expanded
6125 && !ix86_current_function_calls_tls_descriptor)
6127 frame->red_zone_size = to_allocate;
6128 if (frame->save_regs_using_mov)
6129 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6130 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6131 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6133 else
6134 frame->red_zone_size = 0;
6135 frame->stack_pointer_offset -= frame->red_zone_size;
6137 /* The SEH frame pointer location is near the bottom of the frame.
6138 This is enforced by the fact that the difference between the
6139 stack pointer and the frame pointer is limited to 240 bytes in
6140 the unwind data structure. */
6141 if (TARGET_SEH)
6143 HOST_WIDE_INT diff;
6145 /* If we can leave the frame pointer where it is, do so. Also, returns
6146 the establisher frame for __builtin_frame_address (0). */
6147 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
6148 if (diff <= SEH_MAX_FRAME_SIZE
6149 && (diff > 240 || (diff & 15) != 0)
6150 && !crtl->accesses_prior_frames)
6152 /* Ideally we'd determine what portion of the local stack frame
6153 (within the constraint of the lowest 240) is most heavily used.
6154 But without that complication, simply bias the frame pointer
6155 by 128 bytes so as to maximize the amount of the local stack
6156 frame that is addressable with 8-bit offsets. */
6157 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
6162 /* This is semi-inlined memory_address_length, but simplified
6163 since we know that we're always dealing with reg+offset, and
6164 to avoid having to create and discard all that rtl. */
6166 static inline int
6167 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
6169 int len = 4;
6171 if (offset == 0)
6173 /* EBP and R13 cannot be encoded without an offset. */
6174 len = (regno == BP_REG || regno == R13_REG);
6176 else if (IN_RANGE (offset, -128, 127))
6177 len = 1;
6179 /* ESP and R12 must be encoded with a SIB byte. */
6180 if (regno == SP_REG || regno == R12_REG)
6181 len++;
6183 return len;
6186 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
6187 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6189 static bool
6190 sp_valid_at (HOST_WIDE_INT cfa_offset)
6192 const struct machine_frame_state &fs = cfun->machine->fs;
6193 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
6195 /* Validate that the cfa_offset isn't in a "no-man's land". */
6196 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
6197 return false;
6199 return fs.sp_valid;
6202 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
6203 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6205 static inline bool
6206 fp_valid_at (HOST_WIDE_INT cfa_offset)
6208 const struct machine_frame_state &fs = cfun->machine->fs;
6209 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
6211 /* Validate that the cfa_offset isn't in a "no-man's land". */
6212 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
6213 return false;
6215 return fs.fp_valid;
6218 /* Choose a base register based upon alignment requested, speed and/or
6219 size. */
6221 static void
6222 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
6223 HOST_WIDE_INT &base_offset,
6224 unsigned int align_reqested, unsigned int *align)
6226 const struct machine_function *m = cfun->machine;
6227 unsigned int hfp_align;
6228 unsigned int drap_align;
6229 unsigned int sp_align;
6230 bool hfp_ok = fp_valid_at (cfa_offset);
6231 bool drap_ok = m->fs.drap_valid;
6232 bool sp_ok = sp_valid_at (cfa_offset);
6234 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
6236 /* Filter out any registers that don't meet the requested alignment
6237 criteria. */
6238 if (align_reqested)
6240 if (m->fs.realigned)
6241 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
6242 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
6243 notes (which we would need to use a realigned stack pointer),
6244 so disable on SEH targets. */
6245 else if (m->fs.sp_realigned)
6246 sp_align = crtl->stack_alignment_needed;
6248 hfp_ok = hfp_ok && hfp_align >= align_reqested;
6249 drap_ok = drap_ok && drap_align >= align_reqested;
6250 sp_ok = sp_ok && sp_align >= align_reqested;
6253 if (m->use_fast_prologue_epilogue)
6255 /* Choose the base register most likely to allow the most scheduling
6256 opportunities. Generally FP is valid throughout the function,
6257 while DRAP must be reloaded within the epilogue. But choose either
6258 over the SP due to increased encoding size. */
6260 if (hfp_ok)
6262 base_reg = hard_frame_pointer_rtx;
6263 base_offset = m->fs.fp_offset - cfa_offset;
6265 else if (drap_ok)
6267 base_reg = crtl->drap_reg;
6268 base_offset = 0 - cfa_offset;
6270 else if (sp_ok)
6272 base_reg = stack_pointer_rtx;
6273 base_offset = m->fs.sp_offset - cfa_offset;
6276 else
6278 HOST_WIDE_INT toffset;
6279 int len = 16, tlen;
6281 /* Choose the base register with the smallest address encoding.
6282 With a tie, choose FP > DRAP > SP. */
6283 if (sp_ok)
6285 base_reg = stack_pointer_rtx;
6286 base_offset = m->fs.sp_offset - cfa_offset;
6287 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
6289 if (drap_ok)
6291 toffset = 0 - cfa_offset;
6292 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
6293 if (tlen <= len)
6295 base_reg = crtl->drap_reg;
6296 base_offset = toffset;
6297 len = tlen;
6300 if (hfp_ok)
6302 toffset = m->fs.fp_offset - cfa_offset;
6303 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
6304 if (tlen <= len)
6306 base_reg = hard_frame_pointer_rtx;
6307 base_offset = toffset;
6308 len = tlen;
6313 /* Set the align return value. */
6314 if (align)
6316 if (base_reg == stack_pointer_rtx)
6317 *align = sp_align;
6318 else if (base_reg == crtl->drap_reg)
6319 *align = drap_align;
6320 else if (base_reg == hard_frame_pointer_rtx)
6321 *align = hfp_align;
6325 /* Return an RTX that points to CFA_OFFSET within the stack frame and
6326 the alignment of address. If ALIGN is non-null, it should point to
6327 an alignment value (in bits) that is preferred or zero and will
6328 recieve the alignment of the base register that was selected,
6329 irrespective of rather or not CFA_OFFSET is a multiple of that
6330 alignment value. If it is possible for the base register offset to be
6331 non-immediate then SCRATCH_REGNO should specify a scratch register to
6332 use.
6334 The valid base registers are taken from CFUN->MACHINE->FS. */
6336 static rtx
6337 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
6338 unsigned int scratch_regno = INVALID_REGNUM)
6340 rtx base_reg = NULL;
6341 HOST_WIDE_INT base_offset = 0;
6343 /* If a specific alignment is requested, try to get a base register
6344 with that alignment first. */
6345 if (align && *align)
6346 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
6348 if (!base_reg)
6349 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
6351 gcc_assert (base_reg != NULL);
6353 rtx base_offset_rtx = GEN_INT (base_offset);
6355 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
6357 gcc_assert (scratch_regno != INVALID_REGNUM);
6359 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
6360 emit_move_insn (scratch_reg, base_offset_rtx);
6362 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
6365 return plus_constant (Pmode, base_reg, base_offset);
6368 /* Emit code to save registers in the prologue. */
6370 static void
6371 ix86_emit_save_regs (void)
6373 unsigned int regno;
6374 rtx_insn *insn;
6376 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
6377 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6379 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
6380 RTX_FRAME_RELATED_P (insn) = 1;
6384 /* Emit a single register save at CFA - CFA_OFFSET. */
6386 static void
6387 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
6388 HOST_WIDE_INT cfa_offset)
6390 struct machine_function *m = cfun->machine;
6391 rtx reg = gen_rtx_REG (mode, regno);
6392 rtx mem, addr, base, insn;
6393 unsigned int align = GET_MODE_ALIGNMENT (mode);
6395 addr = choose_baseaddr (cfa_offset, &align);
6396 mem = gen_frame_mem (mode, addr);
6398 /* The location aligment depends upon the base register. */
6399 align = MIN (GET_MODE_ALIGNMENT (mode), align);
6400 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
6401 set_mem_align (mem, align);
6403 insn = emit_insn (gen_rtx_SET (mem, reg));
6404 RTX_FRAME_RELATED_P (insn) = 1;
6406 base = addr;
6407 if (GET_CODE (base) == PLUS)
6408 base = XEXP (base, 0);
6409 gcc_checking_assert (REG_P (base));
6411 /* When saving registers into a re-aligned local stack frame, avoid
6412 any tricky guessing by dwarf2out. */
6413 if (m->fs.realigned)
6415 gcc_checking_assert (stack_realign_drap);
6417 if (regno == REGNO (crtl->drap_reg))
6419 /* A bit of a hack. We force the DRAP register to be saved in
6420 the re-aligned stack frame, which provides us with a copy
6421 of the CFA that will last past the prologue. Install it. */
6422 gcc_checking_assert (cfun->machine->fs.fp_valid);
6423 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6424 cfun->machine->fs.fp_offset - cfa_offset);
6425 mem = gen_rtx_MEM (mode, addr);
6426 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
6428 else
6430 /* The frame pointer is a stable reference within the
6431 aligned frame. Use it. */
6432 gcc_checking_assert (cfun->machine->fs.fp_valid);
6433 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6434 cfun->machine->fs.fp_offset - cfa_offset);
6435 mem = gen_rtx_MEM (mode, addr);
6436 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6440 else if (base == stack_pointer_rtx && m->fs.sp_realigned
6441 && cfa_offset >= m->fs.sp_realigned_offset)
6443 gcc_checking_assert (stack_realign_fp);
6444 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6447 /* The memory may not be relative to the current CFA register,
6448 which means that we may need to generate a new pattern for
6449 use by the unwind info. */
6450 else if (base != m->fs.cfa_reg)
6452 addr = plus_constant (Pmode, m->fs.cfa_reg,
6453 m->fs.cfa_offset - cfa_offset);
6454 mem = gen_rtx_MEM (mode, addr);
6455 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6459 /* Emit code to save registers using MOV insns.
6460 First register is stored at CFA - CFA_OFFSET. */
6461 static void
6462 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
6464 unsigned int regno;
6466 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6467 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6469 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
6470 cfa_offset -= UNITS_PER_WORD;
6474 /* Emit code to save SSE registers using MOV insns.
6475 First register is stored at CFA - CFA_OFFSET. */
6476 static void
6477 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
6479 unsigned int regno;
6481 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6482 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6484 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
6485 cfa_offset -= GET_MODE_SIZE (V4SFmode);
6489 static GTY(()) rtx queued_cfa_restores;
6491 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
6492 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
6493 Don't add the note if the previously saved value will be left untouched
6494 within stack red-zone till return, as unwinders can find the same value
6495 in the register and on the stack. */
6497 static void
6498 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
6500 if (!crtl->shrink_wrapped
6501 && cfa_offset <= cfun->machine->fs.red_zone_offset)
6502 return;
6504 if (insn)
6506 add_reg_note (insn, REG_CFA_RESTORE, reg);
6507 RTX_FRAME_RELATED_P (insn) = 1;
6509 else
6510 queued_cfa_restores
6511 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
6514 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
6516 static void
6517 ix86_add_queued_cfa_restore_notes (rtx insn)
6519 rtx last;
6520 if (!queued_cfa_restores)
6521 return;
6522 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
6524 XEXP (last, 1) = REG_NOTES (insn);
6525 REG_NOTES (insn) = queued_cfa_restores;
6526 queued_cfa_restores = NULL_RTX;
6527 RTX_FRAME_RELATED_P (insn) = 1;
6530 /* Expand prologue or epilogue stack adjustment.
6531 The pattern exist to put a dependency on all ebp-based memory accesses.
6532 STYLE should be negative if instructions should be marked as frame related,
6533 zero if %r11 register is live and cannot be freely used and positive
6534 otherwise. */
6536 static rtx
6537 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
6538 int style, bool set_cfa)
6540 struct machine_function *m = cfun->machine;
6541 rtx insn;
6542 bool add_frame_related_expr = false;
6544 if (Pmode == SImode)
6545 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
6546 else if (x86_64_immediate_operand (offset, DImode))
6547 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
6548 else
6550 rtx tmp;
6551 /* r11 is used by indirect sibcall return as well, set before the
6552 epilogue and used after the epilogue. */
6553 if (style)
6554 tmp = gen_rtx_REG (DImode, R11_REG);
6555 else
6557 gcc_assert (src != hard_frame_pointer_rtx
6558 && dest != hard_frame_pointer_rtx);
6559 tmp = hard_frame_pointer_rtx;
6561 insn = emit_insn (gen_rtx_SET (tmp, offset));
6562 if (style < 0)
6563 add_frame_related_expr = true;
6565 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
6568 insn = emit_insn (insn);
6569 if (style >= 0)
6570 ix86_add_queued_cfa_restore_notes (insn);
6572 if (set_cfa)
6574 rtx r;
6576 gcc_assert (m->fs.cfa_reg == src);
6577 m->fs.cfa_offset += INTVAL (offset);
6578 m->fs.cfa_reg = dest;
6580 r = gen_rtx_PLUS (Pmode, src, offset);
6581 r = gen_rtx_SET (dest, r);
6582 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
6583 RTX_FRAME_RELATED_P (insn) = 1;
6585 else if (style < 0)
6587 RTX_FRAME_RELATED_P (insn) = 1;
6588 if (add_frame_related_expr)
6590 rtx r = gen_rtx_PLUS (Pmode, src, offset);
6591 r = gen_rtx_SET (dest, r);
6592 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
6596 if (dest == stack_pointer_rtx)
6598 HOST_WIDE_INT ooffset = m->fs.sp_offset;
6599 bool valid = m->fs.sp_valid;
6600 bool realigned = m->fs.sp_realigned;
6602 if (src == hard_frame_pointer_rtx)
6604 valid = m->fs.fp_valid;
6605 realigned = false;
6606 ooffset = m->fs.fp_offset;
6608 else if (src == crtl->drap_reg)
6610 valid = m->fs.drap_valid;
6611 realigned = false;
6612 ooffset = 0;
6614 else
6616 /* Else there are two possibilities: SP itself, which we set
6617 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
6618 taken care of this by hand along the eh_return path. */
6619 gcc_checking_assert (src == stack_pointer_rtx
6620 || offset == const0_rtx);
6623 m->fs.sp_offset = ooffset - INTVAL (offset);
6624 m->fs.sp_valid = valid;
6625 m->fs.sp_realigned = realigned;
6627 return insn;
6630 /* Find an available register to be used as dynamic realign argument
6631 pointer regsiter. Such a register will be written in prologue and
6632 used in begin of body, so it must not be
6633 1. parameter passing register.
6634 2. GOT pointer.
6635 We reuse static-chain register if it is available. Otherwise, we
6636 use DI for i386 and R13 for x86-64. We chose R13 since it has
6637 shorter encoding.
6639 Return: the regno of chosen register. */
6641 static unsigned int
6642 find_drap_reg (void)
6644 tree decl = cfun->decl;
6646 /* Always use callee-saved register if there are no caller-saved
6647 registers. */
6648 if (TARGET_64BIT)
6650 /* Use R13 for nested function or function need static chain.
6651 Since function with tail call may use any caller-saved
6652 registers in epilogue, DRAP must not use caller-saved
6653 register in such case. */
6654 if (DECL_STATIC_CHAIN (decl)
6655 || cfun->machine->no_caller_saved_registers
6656 || crtl->tail_call_emit)
6657 return R13_REG;
6659 return R10_REG;
6661 else
6663 /* Use DI for nested function or function need static chain.
6664 Since function with tail call may use any caller-saved
6665 registers in epilogue, DRAP must not use caller-saved
6666 register in such case. */
6667 if (DECL_STATIC_CHAIN (decl)
6668 || cfun->machine->no_caller_saved_registers
6669 || crtl->tail_call_emit)
6670 return DI_REG;
6672 /* Reuse static chain register if it isn't used for parameter
6673 passing. */
6674 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
6676 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
6677 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
6678 return CX_REG;
6680 return DI_REG;
6684 /* Return minimum incoming stack alignment. */
6686 static unsigned int
6687 ix86_minimum_incoming_stack_boundary (bool sibcall)
6689 unsigned int incoming_stack_boundary;
6691 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
6692 if (cfun->machine->func_type != TYPE_NORMAL)
6693 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
6694 /* Prefer the one specified at command line. */
6695 else if (ix86_user_incoming_stack_boundary)
6696 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
6697 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
6698 if -mstackrealign is used, it isn't used for sibcall check and
6699 estimated stack alignment is 128bit. */
6700 else if (!sibcall
6701 && ix86_force_align_arg_pointer
6702 && crtl->stack_alignment_estimated == 128)
6703 incoming_stack_boundary = MIN_STACK_BOUNDARY;
6704 else
6705 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
6707 /* Incoming stack alignment can be changed on individual functions
6708 via force_align_arg_pointer attribute. We use the smallest
6709 incoming stack boundary. */
6710 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
6711 && lookup_attribute ("force_align_arg_pointer",
6712 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6713 incoming_stack_boundary = MIN_STACK_BOUNDARY;
6715 /* The incoming stack frame has to be aligned at least at
6716 parm_stack_boundary. */
6717 if (incoming_stack_boundary < crtl->parm_stack_boundary)
6718 incoming_stack_boundary = crtl->parm_stack_boundary;
6720 /* Stack at entrance of main is aligned by runtime. We use the
6721 smallest incoming stack boundary. */
6722 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
6723 && DECL_NAME (current_function_decl)
6724 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6725 && DECL_FILE_SCOPE_P (current_function_decl))
6726 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
6728 return incoming_stack_boundary;
6731 /* Update incoming stack boundary and estimated stack alignment. */
6733 static void
6734 ix86_update_stack_boundary (void)
6736 ix86_incoming_stack_boundary
6737 = ix86_minimum_incoming_stack_boundary (false);
6739 /* x86_64 vararg needs 16byte stack alignment for register save area. */
6740 if (TARGET_64BIT
6741 && cfun->stdarg
6742 && crtl->stack_alignment_estimated < 128)
6743 crtl->stack_alignment_estimated = 128;
6745 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
6746 if (ix86_tls_descriptor_calls_expanded_in_cfun
6747 && crtl->preferred_stack_boundary < 128)
6748 crtl->preferred_stack_boundary = 128;
6751 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
6752 needed or an rtx for DRAP otherwise. */
6754 static rtx
6755 ix86_get_drap_rtx (void)
6757 /* We must use DRAP if there are outgoing arguments on stack and
6758 ACCUMULATE_OUTGOING_ARGS is false. */
6759 if (ix86_force_drap
6760 || (cfun->machine->outgoing_args_on_stack
6761 && !ACCUMULATE_OUTGOING_ARGS))
6762 crtl->need_drap = true;
6764 if (stack_realign_drap)
6766 /* Assign DRAP to vDRAP and returns vDRAP */
6767 unsigned int regno = find_drap_reg ();
6768 rtx drap_vreg;
6769 rtx arg_ptr;
6770 rtx_insn *seq, *insn;
6772 arg_ptr = gen_rtx_REG (Pmode, regno);
6773 crtl->drap_reg = arg_ptr;
6775 start_sequence ();
6776 drap_vreg = copy_to_reg (arg_ptr);
6777 seq = get_insns ();
6778 end_sequence ();
6780 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
6781 if (!optimize)
6783 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
6784 RTX_FRAME_RELATED_P (insn) = 1;
6786 return drap_vreg;
6788 else
6789 return NULL;
6792 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6794 static rtx
6795 ix86_internal_arg_pointer (void)
6797 return virtual_incoming_args_rtx;
6800 struct scratch_reg {
6801 rtx reg;
6802 bool saved;
6805 /* Return a short-lived scratch register for use on function entry.
6806 In 32-bit mode, it is valid only after the registers are saved
6807 in the prologue. This register must be released by means of
6808 release_scratch_register_on_entry once it is dead. */
6810 static void
6811 get_scratch_register_on_entry (struct scratch_reg *sr)
6813 int regno;
6815 sr->saved = false;
6817 if (TARGET_64BIT)
6819 /* We always use R11 in 64-bit mode. */
6820 regno = R11_REG;
6822 else
6824 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
6825 bool fastcall_p
6826 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
6827 bool thiscall_p
6828 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
6829 bool static_chain_p = DECL_STATIC_CHAIN (decl);
6830 int regparm = ix86_function_regparm (fntype, decl);
6831 int drap_regno
6832 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
6834 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
6835 for the static chain register. */
6836 if ((regparm < 1 || (fastcall_p && !static_chain_p))
6837 && drap_regno != AX_REG)
6838 regno = AX_REG;
6839 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
6840 for the static chain register. */
6841 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
6842 regno = AX_REG;
6843 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
6844 regno = DX_REG;
6845 /* ecx is the static chain register. */
6846 else if (regparm < 3 && !fastcall_p && !thiscall_p
6847 && !static_chain_p
6848 && drap_regno != CX_REG)
6849 regno = CX_REG;
6850 else if (ix86_save_reg (BX_REG, true, false))
6851 regno = BX_REG;
6852 /* esi is the static chain register. */
6853 else if (!(regparm == 3 && static_chain_p)
6854 && ix86_save_reg (SI_REG, true, false))
6855 regno = SI_REG;
6856 else if (ix86_save_reg (DI_REG, true, false))
6857 regno = DI_REG;
6858 else
6860 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
6861 sr->saved = true;
6865 sr->reg = gen_rtx_REG (Pmode, regno);
6866 if (sr->saved)
6868 rtx_insn *insn = emit_insn (gen_push (sr->reg));
6869 RTX_FRAME_RELATED_P (insn) = 1;
6873 /* Release a scratch register obtained from the preceding function.
6875 If RELEASE_VIA_POP is true, we just pop the register off the stack
6876 to release it. This is what non-Linux systems use with -fstack-check.
6878 Otherwise we use OFFSET to locate the saved register and the
6879 allocated stack space becomes part of the local frame and is
6880 deallocated by the epilogue. */
6882 static void
6883 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
6884 bool release_via_pop)
6886 if (sr->saved)
6888 if (release_via_pop)
6890 struct machine_function *m = cfun->machine;
6891 rtx x, insn = emit_insn (gen_pop (sr->reg));
6893 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
6894 RTX_FRAME_RELATED_P (insn) = 1;
6895 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
6896 x = gen_rtx_SET (stack_pointer_rtx, x);
6897 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
6898 m->fs.sp_offset -= UNITS_PER_WORD;
6900 else
6902 rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
6903 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
6904 emit_insn (x);
6909 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
6911 This differs from the next routine in that it tries hard to prevent
6912 attacks that jump the stack guard. Thus it is never allowed to allocate
6913 more than PROBE_INTERVAL bytes of stack space without a suitable
6914 probe.
6916 INT_REGISTERS_SAVED is true if integer registers have already been
6917 pushed on the stack. */
6919 static void
6920 ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size,
6921 const bool int_registers_saved)
6923 struct machine_function *m = cfun->machine;
6925 /* If this function does not statically allocate stack space, then
6926 no probes are needed. */
6927 if (!size)
6929 /* However, the allocation of space via pushes for register
6930 saves could be viewed as allocating space, but without the
6931 need to probe. */
6932 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
6933 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
6934 else
6935 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
6936 return;
6939 /* If we are a noreturn function, then we have to consider the
6940 possibility that we're called via a jump rather than a call.
6942 Thus we don't have the implicit probe generated by saving the
6943 return address into the stack at the call. Thus, the stack
6944 pointer could be anywhere in the guard page. The safe thing
6945 to do is emit a probe now.
6947 The probe can be avoided if we have already emitted any callee
6948 register saves into the stack or have a frame pointer (which will
6949 have been saved as well). Those saves will function as implicit
6950 probes.
6952 ?!? This should be revamped to work like aarch64 and s390 where
6953 we track the offset from the most recent probe. Normally that
6954 offset would be zero. For a noreturn function we would reset
6955 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
6956 we just probe when we cross PROBE_INTERVAL. */
6957 if (TREE_THIS_VOLATILE (cfun->decl)
6958 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
6960 /* We can safely use any register here since we're just going to push
6961 its value and immediately pop it back. But we do try and avoid
6962 argument passing registers so as not to introduce dependencies in
6963 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
6964 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
6965 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
6966 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
6967 m->fs.sp_offset -= UNITS_PER_WORD;
6968 if (m->fs.cfa_reg == stack_pointer_rtx)
6970 m->fs.cfa_offset -= UNITS_PER_WORD;
6971 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
6972 x = gen_rtx_SET (stack_pointer_rtx, x);
6973 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
6974 RTX_FRAME_RELATED_P (insn_push) = 1;
6975 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
6976 x = gen_rtx_SET (stack_pointer_rtx, x);
6977 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
6978 RTX_FRAME_RELATED_P (insn_pop) = 1;
6980 emit_insn (gen_blockage ());
6983 /* If we allocate less than the size of the guard statically,
6984 then no probing is necessary, but we do need to allocate
6985 the stack. */
6986 if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)))
6988 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6989 GEN_INT (-size), -1,
6990 m->fs.cfa_reg == stack_pointer_rtx);
6991 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
6992 return;
6995 /* We're allocating a large enough stack frame that we need to
6996 emit probes. Either emit them inline or in a loop depending
6997 on the size. */
6998 HOST_WIDE_INT probe_interval = get_probe_interval ();
6999 if (size <= 4 * probe_interval)
7001 HOST_WIDE_INT i;
7002 for (i = probe_interval; i <= size; i += probe_interval)
7004 /* Allocate PROBE_INTERVAL bytes. */
7005 rtx insn
7006 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7007 GEN_INT (-probe_interval), -1,
7008 m->fs.cfa_reg == stack_pointer_rtx);
7009 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
7011 /* And probe at *sp. */
7012 emit_stack_probe (stack_pointer_rtx);
7013 emit_insn (gen_blockage ());
7016 /* We need to allocate space for the residual, but we do not need
7017 to probe the residual. */
7018 HOST_WIDE_INT residual = (i - probe_interval - size);
7019 if (residual)
7020 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7021 GEN_INT (residual), -1,
7022 m->fs.cfa_reg == stack_pointer_rtx);
7023 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
7025 else
7027 /* We expect the GP registers to be saved when probes are used
7028 as the probing sequences might need a scratch register and
7029 the routine to allocate one assumes the integer registers
7030 have already been saved. */
7031 gcc_assert (int_registers_saved);
7033 struct scratch_reg sr;
7034 get_scratch_register_on_entry (&sr);
7036 /* If we needed to save a register, then account for any space
7037 that was pushed (we are not going to pop the register when
7038 we do the restore). */
7039 if (sr.saved)
7040 size -= UNITS_PER_WORD;
7042 /* Step 1: round SIZE down to a multiple of the interval. */
7043 HOST_WIDE_INT rounded_size = size & -probe_interval;
7045 /* Step 2: compute final value of the loop counter. Use lea if
7046 possible. */
7047 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
7048 rtx insn;
7049 if (address_no_seg_operand (addr, Pmode))
7050 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
7051 else
7053 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7054 insn = emit_insn (gen_rtx_SET (sr.reg,
7055 gen_rtx_PLUS (Pmode, sr.reg,
7056 stack_pointer_rtx)));
7058 if (m->fs.cfa_reg == stack_pointer_rtx)
7060 add_reg_note (insn, REG_CFA_DEF_CFA,
7061 plus_constant (Pmode, sr.reg,
7062 m->fs.cfa_offset + rounded_size));
7063 RTX_FRAME_RELATED_P (insn) = 1;
7066 /* Step 3: the loop. */
7067 rtx size_rtx = GEN_INT (rounded_size);
7068 insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg,
7069 size_rtx));
7070 if (m->fs.cfa_reg == stack_pointer_rtx)
7072 m->fs.cfa_offset += rounded_size;
7073 add_reg_note (insn, REG_CFA_DEF_CFA,
7074 plus_constant (Pmode, stack_pointer_rtx,
7075 m->fs.cfa_offset));
7076 RTX_FRAME_RELATED_P (insn) = 1;
7078 m->fs.sp_offset += rounded_size;
7079 emit_insn (gen_blockage ());
7081 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7082 is equal to ROUNDED_SIZE. */
7084 if (size != rounded_size)
7085 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7086 GEN_INT (rounded_size - size), -1,
7087 m->fs.cfa_reg == stack_pointer_rtx);
7088 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
7090 /* This does not deallocate the space reserved for the scratch
7091 register. That will be deallocated in the epilogue. */
7092 release_scratch_register_on_entry (&sr, size, false);
7095 /* Make sure nothing is scheduled before we are done. */
7096 emit_insn (gen_blockage ());
7099 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7101 INT_REGISTERS_SAVED is true if integer registers have already been
7102 pushed on the stack. */
7104 static void
7105 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
7106 const bool int_registers_saved)
7108 /* We skip the probe for the first interval + a small dope of 4 words and
7109 probe that many bytes past the specified size to maintain a protection
7110 area at the botton of the stack. */
7111 const int dope = 4 * UNITS_PER_WORD;
7112 rtx size_rtx = GEN_INT (size), last;
7114 /* See if we have a constant small number of probes to generate. If so,
7115 that's the easy case. The run-time loop is made up of 9 insns in the
7116 generic case while the compile-time loop is made up of 3+2*(n-1) insns
7117 for n # of intervals. */
7118 if (size <= 4 * get_probe_interval ())
7120 HOST_WIDE_INT i, adjust;
7121 bool first_probe = true;
7123 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
7124 values of N from 1 until it exceeds SIZE. If only one probe is
7125 needed, this will not generate any code. Then adjust and probe
7126 to PROBE_INTERVAL + SIZE. */
7127 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
7129 if (first_probe)
7131 adjust = 2 * get_probe_interval () + dope;
7132 first_probe = false;
7134 else
7135 adjust = get_probe_interval ();
7137 emit_insn (gen_rtx_SET (stack_pointer_rtx,
7138 plus_constant (Pmode, stack_pointer_rtx,
7139 -adjust)));
7140 emit_stack_probe (stack_pointer_rtx);
7143 if (first_probe)
7144 adjust = size + get_probe_interval () + dope;
7145 else
7146 adjust = size + get_probe_interval () - i;
7148 emit_insn (gen_rtx_SET (stack_pointer_rtx,
7149 plus_constant (Pmode, stack_pointer_rtx,
7150 -adjust)));
7151 emit_stack_probe (stack_pointer_rtx);
7153 /* Adjust back to account for the additional first interval. */
7154 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
7155 plus_constant (Pmode, stack_pointer_rtx,
7156 (get_probe_interval ()
7157 + dope))));
7160 /* Otherwise, do the same as above, but in a loop. Note that we must be
7161 extra careful with variables wrapping around because we might be at
7162 the very top (or the very bottom) of the address space and we have
7163 to be able to handle this case properly; in particular, we use an
7164 equality test for the loop condition. */
7165 else
7167 /* We expect the GP registers to be saved when probes are used
7168 as the probing sequences might need a scratch register and
7169 the routine to allocate one assumes the integer registers
7170 have already been saved. */
7171 gcc_assert (int_registers_saved);
7173 HOST_WIDE_INT rounded_size;
7174 struct scratch_reg sr;
7176 get_scratch_register_on_entry (&sr);
7178 /* If we needed to save a register, then account for any space
7179 that was pushed (we are not going to pop the register when
7180 we do the restore). */
7181 if (sr.saved)
7182 size -= UNITS_PER_WORD;
7184 /* Step 1: round SIZE to the previous multiple of the interval. */
7186 rounded_size = ROUND_DOWN (size, get_probe_interval ());
7189 /* Step 2: compute initial and final value of the loop counter. */
7191 /* SP = SP_0 + PROBE_INTERVAL. */
7192 emit_insn (gen_rtx_SET (stack_pointer_rtx,
7193 plus_constant (Pmode, stack_pointer_rtx,
7194 - (get_probe_interval () + dope))));
7196 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
7197 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
7198 emit_insn (gen_rtx_SET (sr.reg,
7199 plus_constant (Pmode, stack_pointer_rtx,
7200 -rounded_size)));
7201 else
7203 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7204 emit_insn (gen_rtx_SET (sr.reg,
7205 gen_rtx_PLUS (Pmode, sr.reg,
7206 stack_pointer_rtx)));
7210 /* Step 3: the loop
7214 SP = SP + PROBE_INTERVAL
7215 probe at SP
7217 while (SP != LAST_ADDR)
7219 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
7220 values of N from 1 until it is equal to ROUNDED_SIZE. */
7222 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
7225 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
7226 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
7228 if (size != rounded_size)
7230 emit_insn (gen_rtx_SET (stack_pointer_rtx,
7231 plus_constant (Pmode, stack_pointer_rtx,
7232 rounded_size - size)));
7233 emit_stack_probe (stack_pointer_rtx);
7236 /* Adjust back to account for the additional first interval. */
7237 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
7238 plus_constant (Pmode, stack_pointer_rtx,
7239 (get_probe_interval ()
7240 + dope))));
7242 /* This does not deallocate the space reserved for the scratch
7243 register. That will be deallocated in the epilogue. */
7244 release_scratch_register_on_entry (&sr, size, false);
7247 /* Even if the stack pointer isn't the CFA register, we need to correctly
7248 describe the adjustments made to it, in particular differentiate the
7249 frame-related ones from the frame-unrelated ones. */
7250 if (size > 0)
7252 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
7253 XVECEXP (expr, 0, 0)
7254 = gen_rtx_SET (stack_pointer_rtx,
7255 plus_constant (Pmode, stack_pointer_rtx, -size));
7256 XVECEXP (expr, 0, 1)
7257 = gen_rtx_SET (stack_pointer_rtx,
7258 plus_constant (Pmode, stack_pointer_rtx,
7259 get_probe_interval () + dope + size));
7260 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
7261 RTX_FRAME_RELATED_P (last) = 1;
7263 cfun->machine->fs.sp_offset += size;
7266 /* Make sure nothing is scheduled before we are done. */
7267 emit_insn (gen_blockage ());
7270 /* Adjust the stack pointer up to REG while probing it. */
7272 const char *
7273 output_adjust_stack_and_probe (rtx reg)
7275 static int labelno = 0;
7276 char loop_lab[32];
7277 rtx xops[2];
7279 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7281 /* Loop. */
7282 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7284 /* SP = SP + PROBE_INTERVAL. */
7285 xops[0] = stack_pointer_rtx;
7286 xops[1] = GEN_INT (get_probe_interval ());
7287 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7289 /* Probe at SP. */
7290 xops[1] = const0_rtx;
7291 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
7293 /* Test if SP == LAST_ADDR. */
7294 xops[0] = stack_pointer_rtx;
7295 xops[1] = reg;
7296 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7298 /* Branch. */
7299 fputs ("\tjne\t", asm_out_file);
7300 assemble_name_raw (asm_out_file, loop_lab);
7301 fputc ('\n', asm_out_file);
7303 return "";
7306 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7307 inclusive. These are offsets from the current stack pointer.
7309 INT_REGISTERS_SAVED is true if integer registers have already been
7310 pushed on the stack. */
7312 static void
7313 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
7314 const bool int_registers_saved)
7316 /* See if we have a constant small number of probes to generate. If so,
7317 that's the easy case. The run-time loop is made up of 6 insns in the
7318 generic case while the compile-time loop is made up of n insns for n #
7319 of intervals. */
7320 if (size <= 6 * get_probe_interval ())
7322 HOST_WIDE_INT i;
7324 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
7325 it exceeds SIZE. If only one probe is needed, this will not
7326 generate any code. Then probe at FIRST + SIZE. */
7327 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
7328 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7329 -(first + i)));
7331 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7332 -(first + size)));
7335 /* Otherwise, do the same as above, but in a loop. Note that we must be
7336 extra careful with variables wrapping around because we might be at
7337 the very top (or the very bottom) of the address space and we have
7338 to be able to handle this case properly; in particular, we use an
7339 equality test for the loop condition. */
7340 else
7342 /* We expect the GP registers to be saved when probes are used
7343 as the probing sequences might need a scratch register and
7344 the routine to allocate one assumes the integer registers
7345 have already been saved. */
7346 gcc_assert (int_registers_saved);
7348 HOST_WIDE_INT rounded_size, last;
7349 struct scratch_reg sr;
7351 get_scratch_register_on_entry (&sr);
7354 /* Step 1: round SIZE to the previous multiple of the interval. */
7356 rounded_size = ROUND_DOWN (size, get_probe_interval ());
7359 /* Step 2: compute initial and final value of the loop counter. */
7361 /* TEST_OFFSET = FIRST. */
7362 emit_move_insn (sr.reg, GEN_INT (-first));
7364 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
7365 last = first + rounded_size;
7368 /* Step 3: the loop
7372 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
7373 probe at TEST_ADDR
7375 while (TEST_ADDR != LAST_ADDR)
7377 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
7378 until it is equal to ROUNDED_SIZE. */
7380 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
7383 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
7384 that SIZE is equal to ROUNDED_SIZE. */
7386 if (size != rounded_size)
7387 emit_stack_probe (plus_constant (Pmode,
7388 gen_rtx_PLUS (Pmode,
7389 stack_pointer_rtx,
7390 sr.reg),
7391 rounded_size - size));
7393 release_scratch_register_on_entry (&sr, size, true);
7396 /* Make sure nothing is scheduled before we are done. */
7397 emit_insn (gen_blockage ());
7400 /* Probe a range of stack addresses from REG to END, inclusive. These are
7401 offsets from the current stack pointer. */
7403 const char *
7404 output_probe_stack_range (rtx reg, rtx end)
7406 static int labelno = 0;
7407 char loop_lab[32];
7408 rtx xops[3];
7410 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7412 /* Loop. */
7413 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7415 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
7416 xops[0] = reg;
7417 xops[1] = GEN_INT (get_probe_interval ());
7418 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7420 /* Probe at TEST_ADDR. */
7421 xops[0] = stack_pointer_rtx;
7422 xops[1] = reg;
7423 xops[2] = const0_rtx;
7424 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
7426 /* Test if TEST_ADDR == LAST_ADDR. */
7427 xops[0] = reg;
7428 xops[1] = end;
7429 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7431 /* Branch. */
7432 fputs ("\tjne\t", asm_out_file);
7433 assemble_name_raw (asm_out_file, loop_lab);
7434 fputc ('\n', asm_out_file);
7436 return "";
7439 /* Return true if stack frame is required. Update STACK_ALIGNMENT
7440 to the largest alignment, in bits, of stack slot used if stack
7441 frame is required and CHECK_STACK_SLOT is true. */
7443 static bool
7444 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
7445 bool check_stack_slot)
7447 HARD_REG_SET set_up_by_prologue, prologue_used;
7448 basic_block bb;
7450 CLEAR_HARD_REG_SET (prologue_used);
7451 CLEAR_HARD_REG_SET (set_up_by_prologue);
7452 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
7453 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
7454 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
7455 HARD_FRAME_POINTER_REGNUM);
7457 /* The preferred stack alignment is the minimum stack alignment. */
7458 if (stack_alignment > crtl->preferred_stack_boundary)
7459 stack_alignment = crtl->preferred_stack_boundary;
7461 bool require_stack_frame = false;
7463 FOR_EACH_BB_FN (bb, cfun)
7465 rtx_insn *insn;
7466 FOR_BB_INSNS (bb, insn)
7467 if (NONDEBUG_INSN_P (insn)
7468 && requires_stack_frame_p (insn, prologue_used,
7469 set_up_by_prologue))
7471 require_stack_frame = true;
7473 if (check_stack_slot)
7475 /* Find the maximum stack alignment. */
7476 subrtx_iterator::array_type array;
7477 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
7478 if (MEM_P (*iter)
7479 && (reg_mentioned_p (stack_pointer_rtx,
7480 *iter)
7481 || reg_mentioned_p (frame_pointer_rtx,
7482 *iter)))
7484 unsigned int alignment = MEM_ALIGN (*iter);
7485 if (alignment > stack_alignment)
7486 stack_alignment = alignment;
7492 return require_stack_frame;
7495 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
7496 will guide prologue/epilogue to be generated in correct form. */
7498 static void
7499 ix86_finalize_stack_frame_flags (void)
7501 /* Check if stack realign is really needed after reload, and
7502 stores result in cfun */
7503 unsigned int incoming_stack_boundary
7504 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7505 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7506 unsigned int stack_alignment
7507 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
7508 ? crtl->max_used_stack_slot_alignment
7509 : crtl->stack_alignment_needed);
7510 unsigned int stack_realign
7511 = (incoming_stack_boundary < stack_alignment);
7512 bool recompute_frame_layout_p = false;
7514 if (crtl->stack_realign_finalized)
7516 /* After stack_realign_needed is finalized, we can't no longer
7517 change it. */
7518 gcc_assert (crtl->stack_realign_needed == stack_realign);
7519 return;
7522 /* If the only reason for frame_pointer_needed is that we conservatively
7523 assumed stack realignment might be needed or -fno-omit-frame-pointer
7524 is used, but in the end nothing that needed the stack alignment had
7525 been spilled nor stack access, clear frame_pointer_needed and say we
7526 don't need stack realignment. */
7527 if ((stack_realign || (!flag_omit_frame_pointer && optimize))
7528 && frame_pointer_needed
7529 && crtl->is_leaf
7530 && crtl->sp_is_unchanging
7531 && !ix86_current_function_calls_tls_descriptor
7532 && !crtl->accesses_prior_frames
7533 && !cfun->calls_alloca
7534 && !crtl->calls_eh_return
7535 /* See ira_setup_eliminable_regset for the rationale. */
7536 && !(STACK_CHECK_MOVING_SP
7537 && flag_stack_check
7538 && flag_exceptions
7539 && cfun->can_throw_non_call_exceptions)
7540 && !ix86_frame_pointer_required ()
7541 && get_frame_size () == 0
7542 && ix86_nsaved_sseregs () == 0
7543 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
7545 if (ix86_find_max_used_stack_alignment (stack_alignment,
7546 stack_realign))
7548 /* Stack frame is required. If stack alignment needed is less
7549 than incoming stack boundary, don't realign stack. */
7550 stack_realign = incoming_stack_boundary < stack_alignment;
7551 if (!stack_realign)
7553 crtl->max_used_stack_slot_alignment
7554 = incoming_stack_boundary;
7555 crtl->stack_alignment_needed
7556 = incoming_stack_boundary;
7557 /* Also update preferred_stack_boundary for leaf
7558 functions. */
7559 crtl->preferred_stack_boundary
7560 = incoming_stack_boundary;
7563 else
7565 /* If drap has been set, but it actually isn't live at the
7566 start of the function, there is no reason to set it up. */
7567 if (crtl->drap_reg)
7569 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
7570 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
7571 REGNO (crtl->drap_reg)))
7573 crtl->drap_reg = NULL_RTX;
7574 crtl->need_drap = false;
7577 else
7578 cfun->machine->no_drap_save_restore = true;
7580 frame_pointer_needed = false;
7581 stack_realign = false;
7582 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
7583 crtl->stack_alignment_needed = incoming_stack_boundary;
7584 crtl->stack_alignment_estimated = incoming_stack_boundary;
7585 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
7586 crtl->preferred_stack_boundary = incoming_stack_boundary;
7587 df_finish_pass (true);
7588 df_scan_alloc (NULL);
7589 df_scan_blocks ();
7590 df_compute_regs_ever_live (true);
7591 df_analyze ();
7593 if (flag_var_tracking)
7595 /* Since frame pointer is no longer available, replace it with
7596 stack pointer - UNITS_PER_WORD in debug insns. */
7597 df_ref ref, next;
7598 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
7599 ref; ref = next)
7601 next = DF_REF_NEXT_REG (ref);
7602 if (!DF_REF_INSN_INFO (ref))
7603 continue;
7605 /* Make sure the next ref is for a different instruction,
7606 so that we're not affected by the rescan. */
7607 rtx_insn *insn = DF_REF_INSN (ref);
7608 while (next && DF_REF_INSN (next) == insn)
7609 next = DF_REF_NEXT_REG (next);
7611 if (DEBUG_INSN_P (insn))
7613 bool changed = false;
7614 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
7616 rtx *loc = DF_REF_LOC (ref);
7617 if (*loc == hard_frame_pointer_rtx)
7619 *loc = plus_constant (Pmode,
7620 stack_pointer_rtx,
7621 -UNITS_PER_WORD);
7622 changed = true;
7625 if (changed)
7626 df_insn_rescan (insn);
7631 recompute_frame_layout_p = true;
7634 else if (crtl->max_used_stack_slot_alignment >= 128)
7636 /* We don't need to realign stack. max_used_stack_alignment is
7637 used to decide how stack frame should be aligned. This is
7638 independent of any psABIs nor 32-bit vs 64-bit. It is always
7639 safe to compute max_used_stack_alignment. We compute it only
7640 if 128-bit aligned load/store may be generated on misaligned
7641 stack slot which will lead to segfault. */
7642 if (ix86_find_max_used_stack_alignment (stack_alignment, true))
7643 cfun->machine->max_used_stack_alignment
7644 = stack_alignment / BITS_PER_UNIT;
7647 if (crtl->stack_realign_needed != stack_realign)
7648 recompute_frame_layout_p = true;
7649 crtl->stack_realign_needed = stack_realign;
7650 crtl->stack_realign_finalized = true;
7651 if (recompute_frame_layout_p)
7652 ix86_compute_frame_layout ();
7655 /* Delete SET_GOT right after entry block if it is allocated to reg. */
7657 static void
7658 ix86_elim_entry_set_got (rtx reg)
7660 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
7661 rtx_insn *c_insn = BB_HEAD (bb);
7662 if (!NONDEBUG_INSN_P (c_insn))
7663 c_insn = next_nonnote_nondebug_insn (c_insn);
7664 if (c_insn && NONJUMP_INSN_P (c_insn))
7666 rtx pat = PATTERN (c_insn);
7667 if (GET_CODE (pat) == PARALLEL)
7669 rtx vec = XVECEXP (pat, 0, 0);
7670 if (GET_CODE (vec) == SET
7671 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
7672 && REGNO (XEXP (vec, 0)) == REGNO (reg))
7673 delete_insn (c_insn);
7678 static rtx
7679 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
7681 rtx addr, mem;
7683 if (offset)
7684 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
7685 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
7686 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
7689 static inline rtx
7690 gen_frame_load (rtx reg, rtx frame_reg, int offset)
7692 return gen_frame_set (reg, frame_reg, offset, false);
7695 static inline rtx
7696 gen_frame_store (rtx reg, rtx frame_reg, int offset)
7698 return gen_frame_set (reg, frame_reg, offset, true);
7701 static void
7702 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
7704 struct machine_function *m = cfun->machine;
7705 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
7706 + m->call_ms2sysv_extra_regs;
7707 rtvec v = rtvec_alloc (ncregs + 1);
7708 unsigned int align, i, vi = 0;
7709 rtx_insn *insn;
7710 rtx sym, addr;
7711 rtx rax = gen_rtx_REG (word_mode, AX_REG);
7712 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
7714 /* AL should only be live with sysv_abi. */
7715 gcc_assert (!ix86_eax_live_at_start_p ());
7716 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
7718 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
7719 we've actually realigned the stack or not. */
7720 align = GET_MODE_ALIGNMENT (V4SFmode);
7721 addr = choose_baseaddr (frame.stack_realign_offset
7722 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
7723 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
7725 emit_insn (gen_rtx_SET (rax, addr));
7727 /* Get the stub symbol. */
7728 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
7729 : XLOGUE_STUB_SAVE);
7730 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
7732 for (i = 0; i < ncregs; ++i)
7734 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
7735 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
7736 r.regno);
7737 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
7740 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
7742 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
7743 RTX_FRAME_RELATED_P (insn) = true;
7746 /* Expand the prologue into a bunch of separate insns. */
7748 void
7749 ix86_expand_prologue (void)
7751 struct machine_function *m = cfun->machine;
7752 rtx insn, t;
7753 HOST_WIDE_INT allocate;
7754 bool int_registers_saved;
7755 bool sse_registers_saved;
7756 bool save_stub_call_needed;
7757 rtx static_chain = NULL_RTX;
7759 if (ix86_function_naked (current_function_decl))
7760 return;
7762 ix86_finalize_stack_frame_flags ();
7764 /* DRAP should not coexist with stack_realign_fp */
7765 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
7767 memset (&m->fs, 0, sizeof (m->fs));
7769 /* Initialize CFA state for before the prologue. */
7770 m->fs.cfa_reg = stack_pointer_rtx;
7771 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
7773 /* Track SP offset to the CFA. We continue tracking this after we've
7774 swapped the CFA register away from SP. In the case of re-alignment
7775 this is fudged; we're interested to offsets within the local frame. */
7776 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
7777 m->fs.sp_valid = true;
7778 m->fs.sp_realigned = false;
7780 const struct ix86_frame &frame = cfun->machine->frame;
7782 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
7784 /* We should have already generated an error for any use of
7785 ms_hook on a nested function. */
7786 gcc_checking_assert (!ix86_static_chain_on_stack);
7788 /* Check if profiling is active and we shall use profiling before
7789 prologue variant. If so sorry. */
7790 if (crtl->profile && flag_fentry != 0)
7791 sorry ("%<ms_hook_prologue%> attribute is not compatible "
7792 "with %<-mfentry%> for 32-bit");
7794 /* In ix86_asm_output_function_label we emitted:
7795 8b ff movl.s %edi,%edi
7796 55 push %ebp
7797 8b ec movl.s %esp,%ebp
7799 This matches the hookable function prologue in Win32 API
7800 functions in Microsoft Windows XP Service Pack 2 and newer.
7801 Wine uses this to enable Windows apps to hook the Win32 API
7802 functions provided by Wine.
7804 What that means is that we've already set up the frame pointer. */
7806 if (frame_pointer_needed
7807 && !(crtl->drap_reg && crtl->stack_realign_needed))
7809 rtx push, mov;
7811 /* We've decided to use the frame pointer already set up.
7812 Describe this to the unwinder by pretending that both
7813 push and mov insns happen right here.
7815 Putting the unwind info here at the end of the ms_hook
7816 is done so that we can make absolutely certain we get
7817 the required byte sequence at the start of the function,
7818 rather than relying on an assembler that can produce
7819 the exact encoding required.
7821 However it does mean (in the unpatched case) that we have
7822 a 1 insn window where the asynchronous unwind info is
7823 incorrect. However, if we placed the unwind info at
7824 its correct location we would have incorrect unwind info
7825 in the patched case. Which is probably all moot since
7826 I don't expect Wine generates dwarf2 unwind info for the
7827 system libraries that use this feature. */
7829 insn = emit_insn (gen_blockage ());
7831 push = gen_push (hard_frame_pointer_rtx);
7832 mov = gen_rtx_SET (hard_frame_pointer_rtx,
7833 stack_pointer_rtx);
7834 RTX_FRAME_RELATED_P (push) = 1;
7835 RTX_FRAME_RELATED_P (mov) = 1;
7837 RTX_FRAME_RELATED_P (insn) = 1;
7838 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7839 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
7841 /* Note that gen_push incremented m->fs.cfa_offset, even
7842 though we didn't emit the push insn here. */
7843 m->fs.cfa_reg = hard_frame_pointer_rtx;
7844 m->fs.fp_offset = m->fs.cfa_offset;
7845 m->fs.fp_valid = true;
7847 else
7849 /* The frame pointer is not needed so pop %ebp again.
7850 This leaves us with a pristine state. */
7851 emit_insn (gen_pop (hard_frame_pointer_rtx));
7855 /* The first insn of a function that accepts its static chain on the
7856 stack is to push the register that would be filled in by a direct
7857 call. This insn will be skipped by the trampoline. */
7858 else if (ix86_static_chain_on_stack)
7860 static_chain = ix86_static_chain (cfun->decl, false);
7861 insn = emit_insn (gen_push (static_chain));
7862 emit_insn (gen_blockage ());
7864 /* We don't want to interpret this push insn as a register save,
7865 only as a stack adjustment. The real copy of the register as
7866 a save will be done later, if needed. */
7867 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
7868 t = gen_rtx_SET (stack_pointer_rtx, t);
7869 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
7870 RTX_FRAME_RELATED_P (insn) = 1;
7873 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
7874 of DRAP is needed and stack realignment is really needed after reload */
7875 if (stack_realign_drap)
7877 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7879 /* Can't use DRAP in interrupt function. */
7880 if (cfun->machine->func_type != TYPE_NORMAL)
7881 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
7882 "in interrupt service routine. This may be worked "
7883 "around by avoiding functions with aggregate return.");
7885 /* Only need to push parameter pointer reg if it is caller saved. */
7886 if (!call_used_regs[REGNO (crtl->drap_reg)])
7888 /* Push arg pointer reg */
7889 insn = emit_insn (gen_push (crtl->drap_reg));
7890 RTX_FRAME_RELATED_P (insn) = 1;
7893 /* Grab the argument pointer. */
7894 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
7895 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
7896 RTX_FRAME_RELATED_P (insn) = 1;
7897 m->fs.cfa_reg = crtl->drap_reg;
7898 m->fs.cfa_offset = 0;
7900 /* Align the stack. */
7901 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
7902 stack_pointer_rtx,
7903 GEN_INT (-align_bytes)));
7904 RTX_FRAME_RELATED_P (insn) = 1;
7906 /* Replicate the return address on the stack so that return
7907 address can be reached via (argp - 1) slot. This is needed
7908 to implement macro RETURN_ADDR_RTX and intrinsic function
7909 expand_builtin_return_addr etc. */
7910 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
7911 t = gen_frame_mem (word_mode, t);
7912 insn = emit_insn (gen_push (t));
7913 RTX_FRAME_RELATED_P (insn) = 1;
7915 /* For the purposes of frame and register save area addressing,
7916 we've started over with a new frame. */
7917 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
7918 m->fs.realigned = true;
7920 if (static_chain)
7922 /* Replicate static chain on the stack so that static chain
7923 can be reached via (argp - 2) slot. This is needed for
7924 nested function with stack realignment. */
7925 insn = emit_insn (gen_push (static_chain));
7926 RTX_FRAME_RELATED_P (insn) = 1;
7930 int_registers_saved = (frame.nregs == 0);
7931 sse_registers_saved = (frame.nsseregs == 0);
7932 save_stub_call_needed = (m->call_ms2sysv);
7933 gcc_assert (sse_registers_saved || !save_stub_call_needed);
7935 if (frame_pointer_needed && !m->fs.fp_valid)
7937 /* Note: AT&T enter does NOT have reversed args. Enter is probably
7938 slower on all targets. Also sdb didn't like it. */
7939 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
7940 RTX_FRAME_RELATED_P (insn) = 1;
7942 /* Push registers now, before setting the frame pointer
7943 on SEH target. */
7944 if (!int_registers_saved
7945 && TARGET_SEH
7946 && !frame.save_regs_using_mov)
7948 ix86_emit_save_regs ();
7949 int_registers_saved = true;
7950 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
7953 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
7955 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
7956 RTX_FRAME_RELATED_P (insn) = 1;
7958 if (m->fs.cfa_reg == stack_pointer_rtx)
7959 m->fs.cfa_reg = hard_frame_pointer_rtx;
7960 m->fs.fp_offset = m->fs.sp_offset;
7961 m->fs.fp_valid = true;
7965 if (!int_registers_saved)
7967 /* If saving registers via PUSH, do so now. */
7968 if (!frame.save_regs_using_mov)
7970 ix86_emit_save_regs ();
7971 int_registers_saved = true;
7972 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
7975 /* When using red zone we may start register saving before allocating
7976 the stack frame saving one cycle of the prologue. However, avoid
7977 doing this if we have to probe the stack; at least on x86_64 the
7978 stack probe can turn into a call that clobbers a red zone location. */
7979 else if (ix86_using_red_zone ()
7980 && (! TARGET_STACK_PROBE
7981 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
7983 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
7984 int_registers_saved = true;
7988 if (stack_realign_fp)
7990 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7991 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
7993 /* Record last valid frame pointer offset. */
7994 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
7996 /* The computation of the size of the re-aligned stack frame means
7997 that we must allocate the size of the register save area before
7998 performing the actual alignment. Otherwise we cannot guarantee
7999 that there's enough storage above the realignment point. */
8000 allocate = frame.reg_save_offset - m->fs.sp_offset
8001 + frame.stack_realign_allocate;
8002 if (allocate)
8003 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8004 GEN_INT (-allocate), -1, false);
8006 /* Align the stack. */
8007 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
8008 stack_pointer_rtx,
8009 GEN_INT (-align_bytes)));
8010 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
8011 m->fs.sp_realigned_offset = m->fs.sp_offset
8012 - frame.stack_realign_allocate;
8013 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8014 Beyond this point, stack access should be done via choose_baseaddr or
8015 by using sp_valid_at and fp_valid_at to determine the correct base
8016 register. Henceforth, any CFA offset should be thought of as logical
8017 and not physical. */
8018 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
8019 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
8020 m->fs.sp_realigned = true;
8022 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8023 is needed to describe where a register is saved using a realigned
8024 stack pointer, so we need to invalidate the stack pointer for that
8025 target. */
8026 if (TARGET_SEH)
8027 m->fs.sp_valid = false;
8029 /* If SP offset is non-immediate after allocation of the stack frame,
8030 then emit SSE saves or stub call prior to allocating the rest of the
8031 stack frame. This is less efficient for the out-of-line stub because
8032 we can't combine allocations across the call barrier, but it's better
8033 than using a scratch register. */
8034 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
8035 - m->fs.sp_realigned_offset),
8036 Pmode))
8038 if (!sse_registers_saved)
8040 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8041 sse_registers_saved = true;
8043 else if (save_stub_call_needed)
8045 ix86_emit_outlined_ms2sysv_save (frame);
8046 save_stub_call_needed = false;
8051 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
8053 if (flag_stack_usage_info)
8055 /* We start to count from ARG_POINTER. */
8056 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
8058 /* If it was realigned, take into account the fake frame. */
8059 if (stack_realign_drap)
8061 if (ix86_static_chain_on_stack)
8062 stack_size += UNITS_PER_WORD;
8064 if (!call_used_regs[REGNO (crtl->drap_reg)])
8065 stack_size += UNITS_PER_WORD;
8067 /* This over-estimates by 1 minimal-stack-alignment-unit but
8068 mitigates that by counting in the new return address slot. */
8069 current_function_dynamic_stack_size
8070 += crtl->stack_alignment_needed / BITS_PER_UNIT;
8073 current_function_static_stack_size = stack_size;
8076 /* On SEH target with very large frame size, allocate an area to save
8077 SSE registers (as the very large allocation won't be described). */
8078 if (TARGET_SEH
8079 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
8080 && !sse_registers_saved)
8082 HOST_WIDE_INT sse_size
8083 = frame.sse_reg_save_offset - frame.reg_save_offset;
8085 gcc_assert (int_registers_saved);
8087 /* No need to do stack checking as the area will be immediately
8088 written. */
8089 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8090 GEN_INT (-sse_size), -1,
8091 m->fs.cfa_reg == stack_pointer_rtx);
8092 allocate -= sse_size;
8093 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8094 sse_registers_saved = true;
8097 /* The stack has already been decremented by the instruction calling us
8098 so probe if the size is non-negative to preserve the protection area. */
8099 if (allocate >= 0
8100 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
8101 || flag_stack_clash_protection))
8103 if (flag_stack_clash_protection)
8105 ix86_adjust_stack_and_probe_stack_clash (allocate,
8106 int_registers_saved);
8107 allocate = 0;
8109 else if (STACK_CHECK_MOVING_SP)
8111 if (!(crtl->is_leaf && !cfun->calls_alloca
8112 && allocate <= get_probe_interval ()))
8114 ix86_adjust_stack_and_probe (allocate, int_registers_saved);
8115 allocate = 0;
8118 else
8120 HOST_WIDE_INT size = allocate;
8122 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
8123 size = 0x80000000 - get_stack_check_protect () - 1;
8125 if (TARGET_STACK_PROBE)
8127 if (crtl->is_leaf && !cfun->calls_alloca)
8129 if (size > get_probe_interval ())
8130 ix86_emit_probe_stack_range (0, size, int_registers_saved);
8132 else
8133 ix86_emit_probe_stack_range (0,
8134 size + get_stack_check_protect (),
8135 int_registers_saved);
8137 else
8139 if (crtl->is_leaf && !cfun->calls_alloca)
8141 if (size > get_probe_interval ()
8142 && size > get_stack_check_protect ())
8143 ix86_emit_probe_stack_range (get_stack_check_protect (),
8144 (size
8145 - get_stack_check_protect ()),
8146 int_registers_saved);
8148 else
8149 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
8150 int_registers_saved);
8155 if (allocate == 0)
8157 else if (!ix86_target_stack_probe ()
8158 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
8160 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8161 GEN_INT (-allocate), -1,
8162 m->fs.cfa_reg == stack_pointer_rtx);
8164 else
8166 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8167 rtx r10 = NULL;
8168 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
8169 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
8170 bool eax_live = ix86_eax_live_at_start_p ();
8171 bool r10_live = false;
8173 if (TARGET_64BIT)
8174 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
8176 if (eax_live)
8178 insn = emit_insn (gen_push (eax));
8179 allocate -= UNITS_PER_WORD;
8180 /* Note that SEH directives need to continue tracking the stack
8181 pointer even after the frame pointer has been set up. */
8182 if (sp_is_cfa_reg || TARGET_SEH)
8184 if (sp_is_cfa_reg)
8185 m->fs.cfa_offset += UNITS_PER_WORD;
8186 RTX_FRAME_RELATED_P (insn) = 1;
8187 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8188 gen_rtx_SET (stack_pointer_rtx,
8189 plus_constant (Pmode, stack_pointer_rtx,
8190 -UNITS_PER_WORD)));
8194 if (r10_live)
8196 r10 = gen_rtx_REG (Pmode, R10_REG);
8197 insn = emit_insn (gen_push (r10));
8198 allocate -= UNITS_PER_WORD;
8199 if (sp_is_cfa_reg || TARGET_SEH)
8201 if (sp_is_cfa_reg)
8202 m->fs.cfa_offset += UNITS_PER_WORD;
8203 RTX_FRAME_RELATED_P (insn) = 1;
8204 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8205 gen_rtx_SET (stack_pointer_rtx,
8206 plus_constant (Pmode, stack_pointer_rtx,
8207 -UNITS_PER_WORD)));
8211 emit_move_insn (eax, GEN_INT (allocate));
8212 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
8214 /* Use the fact that AX still contains ALLOCATE. */
8215 adjust_stack_insn = (Pmode == DImode
8216 ? gen_pro_epilogue_adjust_stack_di_sub
8217 : gen_pro_epilogue_adjust_stack_si_sub);
8219 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
8220 stack_pointer_rtx, eax));
8222 if (sp_is_cfa_reg || TARGET_SEH)
8224 if (sp_is_cfa_reg)
8225 m->fs.cfa_offset += allocate;
8226 RTX_FRAME_RELATED_P (insn) = 1;
8227 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8228 gen_rtx_SET (stack_pointer_rtx,
8229 plus_constant (Pmode, stack_pointer_rtx,
8230 -allocate)));
8232 m->fs.sp_offset += allocate;
8234 /* Use stack_pointer_rtx for relative addressing so that code works for
8235 realigned stack. But this means that we need a blockage to prevent
8236 stores based on the frame pointer from being scheduled before. */
8237 if (r10_live && eax_live)
8239 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8240 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
8241 gen_frame_mem (word_mode, t));
8242 t = plus_constant (Pmode, t, UNITS_PER_WORD);
8243 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
8244 gen_frame_mem (word_mode, t));
8245 emit_insn (gen_memory_blockage ());
8247 else if (eax_live || r10_live)
8249 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8250 emit_move_insn (gen_rtx_REG (word_mode,
8251 (eax_live ? AX_REG : R10_REG)),
8252 gen_frame_mem (word_mode, t));
8253 emit_insn (gen_memory_blockage ());
8256 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
8258 /* If we havn't already set up the frame pointer, do so now. */
8259 if (frame_pointer_needed && !m->fs.fp_valid)
8261 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
8262 GEN_INT (frame.stack_pointer_offset
8263 - frame.hard_frame_pointer_offset));
8264 insn = emit_insn (insn);
8265 RTX_FRAME_RELATED_P (insn) = 1;
8266 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
8268 if (m->fs.cfa_reg == stack_pointer_rtx)
8269 m->fs.cfa_reg = hard_frame_pointer_rtx;
8270 m->fs.fp_offset = frame.hard_frame_pointer_offset;
8271 m->fs.fp_valid = true;
8274 if (!int_registers_saved)
8275 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8276 if (!sse_registers_saved)
8277 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8278 else if (save_stub_call_needed)
8279 ix86_emit_outlined_ms2sysv_save (frame);
8281 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8282 in PROLOGUE. */
8283 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
8285 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
8286 insn = emit_insn (gen_set_got (pic));
8287 RTX_FRAME_RELATED_P (insn) = 1;
8288 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
8289 emit_insn (gen_prologue_use (pic));
8290 /* Deleting already emmitted SET_GOT if exist and allocated to
8291 REAL_PIC_OFFSET_TABLE_REGNUM. */
8292 ix86_elim_entry_set_got (pic);
8295 if (crtl->drap_reg && !crtl->stack_realign_needed)
8297 /* vDRAP is setup but after reload it turns out stack realign
8298 isn't necessary, here we will emit prologue to setup DRAP
8299 without stack realign adjustment */
8300 t = choose_baseaddr (0, NULL);
8301 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8304 /* Prevent instructions from being scheduled into register save push
8305 sequence when access to the redzone area is done through frame pointer.
8306 The offset between the frame pointer and the stack pointer is calculated
8307 relative to the value of the stack pointer at the end of the function
8308 prologue, and moving instructions that access redzone area via frame
8309 pointer inside push sequence violates this assumption. */
8310 if (frame_pointer_needed && frame.red_zone_size)
8311 emit_insn (gen_memory_blockage ());
8313 /* SEH requires that the prologue end within 256 bytes of the start of
8314 the function. Prevent instruction schedules that would extend that.
8315 Further, prevent alloca modifications to the stack pointer from being
8316 combined with prologue modifications. */
8317 if (TARGET_SEH)
8318 emit_insn (gen_prologue_use (stack_pointer_rtx));
8321 /* Emit code to restore REG using a POP insn. */
8323 static void
8324 ix86_emit_restore_reg_using_pop (rtx reg)
8326 struct machine_function *m = cfun->machine;
8327 rtx_insn *insn = emit_insn (gen_pop (reg));
8329 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
8330 m->fs.sp_offset -= UNITS_PER_WORD;
8332 if (m->fs.cfa_reg == crtl->drap_reg
8333 && REGNO (reg) == REGNO (crtl->drap_reg))
8335 /* Previously we'd represented the CFA as an expression
8336 like *(%ebp - 8). We've just popped that value from
8337 the stack, which means we need to reset the CFA to
8338 the drap register. This will remain until we restore
8339 the stack pointer. */
8340 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8341 RTX_FRAME_RELATED_P (insn) = 1;
8343 /* This means that the DRAP register is valid for addressing too. */
8344 m->fs.drap_valid = true;
8345 return;
8348 if (m->fs.cfa_reg == stack_pointer_rtx)
8350 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8351 x = gen_rtx_SET (stack_pointer_rtx, x);
8352 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
8353 RTX_FRAME_RELATED_P (insn) = 1;
8355 m->fs.cfa_offset -= UNITS_PER_WORD;
8358 /* When the frame pointer is the CFA, and we pop it, we are
8359 swapping back to the stack pointer as the CFA. This happens
8360 for stack frames that don't allocate other data, so we assume
8361 the stack pointer is now pointing at the return address, i.e.
8362 the function entry state, which makes the offset be 1 word. */
8363 if (reg == hard_frame_pointer_rtx)
8365 m->fs.fp_valid = false;
8366 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8368 m->fs.cfa_reg = stack_pointer_rtx;
8369 m->fs.cfa_offset -= UNITS_PER_WORD;
8371 add_reg_note (insn, REG_CFA_DEF_CFA,
8372 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8373 GEN_INT (m->fs.cfa_offset)));
8374 RTX_FRAME_RELATED_P (insn) = 1;
8379 /* Emit code to restore saved registers using POP insns. */
8381 static void
8382 ix86_emit_restore_regs_using_pop (void)
8384 unsigned int regno;
8386 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8387 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
8388 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
8391 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
8392 omits the emit and only attaches the notes. */
8394 static void
8395 ix86_emit_leave (rtx_insn *insn)
8397 struct machine_function *m = cfun->machine;
8398 if (!insn)
8399 insn = emit_insn (ix86_gen_leave ());
8401 ix86_add_queued_cfa_restore_notes (insn);
8403 gcc_assert (m->fs.fp_valid);
8404 m->fs.sp_valid = true;
8405 m->fs.sp_realigned = false;
8406 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
8407 m->fs.fp_valid = false;
8409 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8411 m->fs.cfa_reg = stack_pointer_rtx;
8412 m->fs.cfa_offset = m->fs.sp_offset;
8414 add_reg_note (insn, REG_CFA_DEF_CFA,
8415 plus_constant (Pmode, stack_pointer_rtx,
8416 m->fs.sp_offset));
8417 RTX_FRAME_RELATED_P (insn) = 1;
8419 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
8420 m->fs.fp_offset);
8423 /* Emit code to restore saved registers using MOV insns.
8424 First register is restored from CFA - CFA_OFFSET. */
8425 static void
8426 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
8427 bool maybe_eh_return)
8429 struct machine_function *m = cfun->machine;
8430 unsigned int regno;
8432 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8433 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8435 rtx reg = gen_rtx_REG (word_mode, regno);
8436 rtx mem;
8437 rtx_insn *insn;
8439 mem = choose_baseaddr (cfa_offset, NULL);
8440 mem = gen_frame_mem (word_mode, mem);
8441 insn = emit_move_insn (reg, mem);
8443 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8445 /* Previously we'd represented the CFA as an expression
8446 like *(%ebp - 8). We've just popped that value from
8447 the stack, which means we need to reset the CFA to
8448 the drap register. This will remain until we restore
8449 the stack pointer. */
8450 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8451 RTX_FRAME_RELATED_P (insn) = 1;
8453 /* This means that the DRAP register is valid for addressing. */
8454 m->fs.drap_valid = true;
8456 else
8457 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8459 cfa_offset -= UNITS_PER_WORD;
8463 /* Emit code to restore saved registers using MOV insns.
8464 First register is restored from CFA - CFA_OFFSET. */
8465 static void
8466 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
8467 bool maybe_eh_return)
8469 unsigned int regno;
8471 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8472 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8474 rtx reg = gen_rtx_REG (V4SFmode, regno);
8475 rtx mem;
8476 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
8478 mem = choose_baseaddr (cfa_offset, &align);
8479 mem = gen_rtx_MEM (V4SFmode, mem);
8481 /* The location aligment depends upon the base register. */
8482 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
8483 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
8484 set_mem_align (mem, align);
8485 emit_insn (gen_rtx_SET (reg, mem));
8487 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8489 cfa_offset -= GET_MODE_SIZE (V4SFmode);
8493 static void
8494 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
8495 bool use_call, int style)
8497 struct machine_function *m = cfun->machine;
8498 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8499 + m->call_ms2sysv_extra_regs;
8500 rtvec v;
8501 unsigned int elems_needed, align, i, vi = 0;
8502 rtx_insn *insn;
8503 rtx sym, tmp;
8504 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
8505 rtx r10 = NULL_RTX;
8506 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
8507 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
8508 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
8509 rtx rsi_frame_load = NULL_RTX;
8510 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
8511 enum xlogue_stub stub;
8513 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
8515 /* If using a realigned stack, we should never start with padding. */
8516 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
8518 /* Setup RSI as the stub's base pointer. */
8519 align = GET_MODE_ALIGNMENT (V4SFmode);
8520 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
8521 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8523 emit_insn (gen_rtx_SET (rsi, tmp));
8525 /* Get a symbol for the stub. */
8526 if (frame_pointer_needed)
8527 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
8528 : XLOGUE_STUB_RESTORE_HFP_TAIL;
8529 else
8530 stub = use_call ? XLOGUE_STUB_RESTORE
8531 : XLOGUE_STUB_RESTORE_TAIL;
8532 sym = xlogue.get_stub_rtx (stub);
8534 elems_needed = ncregs;
8535 if (use_call)
8536 elems_needed += 1;
8537 else
8538 elems_needed += frame_pointer_needed ? 5 : 3;
8539 v = rtvec_alloc (elems_needed);
8541 /* We call the epilogue stub when we need to pop incoming args or we are
8542 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
8543 epilogue stub and it is the tail-call. */
8544 if (use_call)
8545 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8546 else
8548 RTVEC_ELT (v, vi++) = ret_rtx;
8549 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8550 if (frame_pointer_needed)
8552 rtx rbp = gen_rtx_REG (DImode, BP_REG);
8553 gcc_assert (m->fs.fp_valid);
8554 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
8556 tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
8557 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
8558 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
8559 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8560 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
8562 else
8564 /* If no hard frame pointer, we set R10 to the SP restore value. */
8565 gcc_assert (!m->fs.fp_valid);
8566 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
8567 gcc_assert (m->fs.sp_valid);
8569 r10 = gen_rtx_REG (DImode, R10_REG);
8570 tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
8571 emit_insn (gen_rtx_SET (r10, tmp));
8573 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
8577 /* Generate frame load insns and restore notes. */
8578 for (i = 0; i < ncregs; ++i)
8580 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8581 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
8582 rtx reg, frame_load;
8584 reg = gen_rtx_REG (mode, r.regno);
8585 frame_load = gen_frame_load (reg, rsi, r.offset);
8587 /* Save RSI frame load insn & note to add last. */
8588 if (r.regno == SI_REG)
8590 gcc_assert (!rsi_frame_load);
8591 rsi_frame_load = frame_load;
8592 rsi_restore_offset = r.offset;
8594 else
8596 RTVEC_ELT (v, vi++) = frame_load;
8597 ix86_add_cfa_restore_note (NULL, reg, r.offset);
8601 /* Add RSI frame load & restore note at the end. */
8602 gcc_assert (rsi_frame_load);
8603 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
8604 RTVEC_ELT (v, vi++) = rsi_frame_load;
8605 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
8606 rsi_restore_offset);
8608 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
8609 if (!use_call && !frame_pointer_needed)
8611 gcc_assert (m->fs.sp_valid);
8612 gcc_assert (!m->fs.sp_realigned);
8614 /* At this point, R10 should point to frame.stack_realign_offset. */
8615 if (m->fs.cfa_reg == stack_pointer_rtx)
8616 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
8617 m->fs.sp_offset = frame.stack_realign_offset;
8620 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
8621 tmp = gen_rtx_PARALLEL (VOIDmode, v);
8622 if (use_call)
8623 insn = emit_insn (tmp);
8624 else
8626 insn = emit_jump_insn (tmp);
8627 JUMP_LABEL (insn) = ret_rtx;
8629 if (frame_pointer_needed)
8630 ix86_emit_leave (insn);
8631 else
8633 /* Need CFA adjust note. */
8634 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
8635 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
8639 RTX_FRAME_RELATED_P (insn) = true;
8640 ix86_add_queued_cfa_restore_notes (insn);
8642 /* If we're not doing a tail-call, we need to adjust the stack. */
8643 if (use_call && m->fs.sp_valid)
8645 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
8646 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8647 GEN_INT (dealloc), style,
8648 m->fs.cfa_reg == stack_pointer_rtx);
8652 /* Restore function stack, frame, and registers. */
8654 void
8655 ix86_expand_epilogue (int style)
8657 struct machine_function *m = cfun->machine;
8658 struct machine_frame_state frame_state_save = m->fs;
8659 bool restore_regs_via_mov;
8660 bool using_drap;
8661 bool restore_stub_is_tail = false;
8663 if (ix86_function_naked (current_function_decl))
8665 /* The program should not reach this point. */
8666 emit_insn (gen_ud2 ());
8667 return;
8670 ix86_finalize_stack_frame_flags ();
8671 const struct ix86_frame &frame = cfun->machine->frame;
8673 m->fs.sp_realigned = stack_realign_fp;
8674 m->fs.sp_valid = stack_realign_fp
8675 || !frame_pointer_needed
8676 || crtl->sp_is_unchanging;
8677 gcc_assert (!m->fs.sp_valid
8678 || m->fs.sp_offset == frame.stack_pointer_offset);
8680 /* The FP must be valid if the frame pointer is present. */
8681 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
8682 gcc_assert (!m->fs.fp_valid
8683 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
8685 /* We must have *some* valid pointer to the stack frame. */
8686 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
8688 /* The DRAP is never valid at this point. */
8689 gcc_assert (!m->fs.drap_valid);
8691 /* See the comment about red zone and frame
8692 pointer usage in ix86_expand_prologue. */
8693 if (frame_pointer_needed && frame.red_zone_size)
8694 emit_insn (gen_memory_blockage ());
8696 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8697 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
8699 /* Determine the CFA offset of the end of the red-zone. */
8700 m->fs.red_zone_offset = 0;
8701 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
8703 /* The red-zone begins below return address and error code in
8704 exception handler. */
8705 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
8707 /* When the register save area is in the aligned portion of
8708 the stack, determine the maximum runtime displacement that
8709 matches up with the aligned frame. */
8710 if (stack_realign_drap)
8711 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
8712 + UNITS_PER_WORD);
8715 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
8717 /* Special care must be taken for the normal return case of a function
8718 using eh_return: the eax and edx registers are marked as saved, but
8719 not restored along this path. Adjust the save location to match. */
8720 if (crtl->calls_eh_return && style != 2)
8721 reg_save_offset -= 2 * UNITS_PER_WORD;
8723 /* EH_RETURN requires the use of moves to function properly. */
8724 if (crtl->calls_eh_return)
8725 restore_regs_via_mov = true;
8726 /* SEH requires the use of pops to identify the epilogue. */
8727 else if (TARGET_SEH)
8728 restore_regs_via_mov = false;
8729 /* If we're only restoring one register and sp cannot be used then
8730 using a move instruction to restore the register since it's
8731 less work than reloading sp and popping the register. */
8732 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
8733 restore_regs_via_mov = true;
8734 else if (TARGET_EPILOGUE_USING_MOVE
8735 && cfun->machine->use_fast_prologue_epilogue
8736 && (frame.nregs > 1
8737 || m->fs.sp_offset != reg_save_offset))
8738 restore_regs_via_mov = true;
8739 else if (frame_pointer_needed
8740 && !frame.nregs
8741 && m->fs.sp_offset != reg_save_offset)
8742 restore_regs_via_mov = true;
8743 else if (frame_pointer_needed
8744 && TARGET_USE_LEAVE
8745 && cfun->machine->use_fast_prologue_epilogue
8746 && frame.nregs == 1)
8747 restore_regs_via_mov = true;
8748 else
8749 restore_regs_via_mov = false;
8751 if (restore_regs_via_mov || frame.nsseregs)
8753 /* Ensure that the entire register save area is addressable via
8754 the stack pointer, if we will restore SSE regs via sp. */
8755 if (TARGET_64BIT
8756 && m->fs.sp_offset > 0x7fffffff
8757 && sp_valid_at (frame.stack_realign_offset + 1)
8758 && (frame.nsseregs + frame.nregs) != 0)
8760 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8761 GEN_INT (m->fs.sp_offset
8762 - frame.sse_reg_save_offset),
8763 style,
8764 m->fs.cfa_reg == stack_pointer_rtx);
8768 /* If there are any SSE registers to restore, then we have to do it
8769 via moves, since there's obviously no pop for SSE regs. */
8770 if (frame.nsseregs)
8771 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
8772 style == 2);
8774 if (m->call_ms2sysv)
8776 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
8778 /* We cannot use a tail-call for the stub if:
8779 1. We have to pop incoming args,
8780 2. We have additional int regs to restore, or
8781 3. A sibling call will be the tail-call, or
8782 4. We are emitting an eh_return_internal epilogue.
8784 TODO: Item 4 has not yet tested!
8786 If any of the above are true, we will call the stub rather than
8787 jump to it. */
8788 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
8789 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
8792 /* If using out-of-line stub that is a tail-call, then...*/
8793 if (m->call_ms2sysv && restore_stub_is_tail)
8795 /* TODO: parinoid tests. (remove eventually) */
8796 gcc_assert (m->fs.sp_valid);
8797 gcc_assert (!m->fs.sp_realigned);
8798 gcc_assert (!m->fs.fp_valid);
8799 gcc_assert (!m->fs.realigned);
8800 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
8801 gcc_assert (!crtl->drap_reg);
8802 gcc_assert (!frame.nregs);
8804 else if (restore_regs_via_mov)
8806 rtx t;
8808 if (frame.nregs)
8809 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
8811 /* eh_return epilogues need %ecx added to the stack pointer. */
8812 if (style == 2)
8814 rtx sa = EH_RETURN_STACKADJ_RTX;
8815 rtx_insn *insn;
8817 /* %ecx can't be used for both DRAP register and eh_return. */
8818 if (crtl->drap_reg)
8819 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
8821 /* regparm nested functions don't work with eh_return. */
8822 gcc_assert (!ix86_static_chain_on_stack);
8824 if (frame_pointer_needed)
8826 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8827 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
8828 emit_insn (gen_rtx_SET (sa, t));
8830 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
8831 insn = emit_move_insn (hard_frame_pointer_rtx, t);
8833 /* Note that we use SA as a temporary CFA, as the return
8834 address is at the proper place relative to it. We
8835 pretend this happens at the FP restore insn because
8836 prior to this insn the FP would be stored at the wrong
8837 offset relative to SA, and after this insn we have no
8838 other reasonable register to use for the CFA. We don't
8839 bother resetting the CFA to the SP for the duration of
8840 the return insn, unless the control flow instrumentation
8841 is done. In this case the SP is used later and we have
8842 to reset CFA to SP. */
8843 add_reg_note (insn, REG_CFA_DEF_CFA,
8844 plus_constant (Pmode, sa, UNITS_PER_WORD));
8845 ix86_add_queued_cfa_restore_notes (insn);
8846 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8847 RTX_FRAME_RELATED_P (insn) = 1;
8849 m->fs.cfa_reg = sa;
8850 m->fs.cfa_offset = UNITS_PER_WORD;
8851 m->fs.fp_valid = false;
8853 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8854 const0_rtx, style,
8855 flag_cf_protection);
8857 else
8859 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8860 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
8861 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
8862 ix86_add_queued_cfa_restore_notes (insn);
8864 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
8865 if (m->fs.cfa_offset != UNITS_PER_WORD)
8867 m->fs.cfa_offset = UNITS_PER_WORD;
8868 add_reg_note (insn, REG_CFA_DEF_CFA,
8869 plus_constant (Pmode, stack_pointer_rtx,
8870 UNITS_PER_WORD));
8871 RTX_FRAME_RELATED_P (insn) = 1;
8874 m->fs.sp_offset = UNITS_PER_WORD;
8875 m->fs.sp_valid = true;
8876 m->fs.sp_realigned = false;
8879 else
8881 /* SEH requires that the function end with (1) a stack adjustment
8882 if necessary, (2) a sequence of pops, and (3) a return or
8883 jump instruction. Prevent insns from the function body from
8884 being scheduled into this sequence. */
8885 if (TARGET_SEH)
8887 /* Prevent a catch region from being adjacent to the standard
8888 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
8889 nor several other flags that would be interesting to test are
8890 set up yet. */
8891 if (flag_non_call_exceptions)
8892 emit_insn (gen_nops (const1_rtx));
8893 else
8894 emit_insn (gen_blockage ());
8897 /* First step is to deallocate the stack frame so that we can
8898 pop the registers. If the stack pointer was realigned, it needs
8899 to be restored now. Also do it on SEH target for very large
8900 frame as the emitted instructions aren't allowed by the ABI
8901 in epilogues. */
8902 if (!m->fs.sp_valid || m->fs.sp_realigned
8903 || (TARGET_SEH
8904 && (m->fs.sp_offset - reg_save_offset
8905 >= SEH_MAX_FRAME_SIZE)))
8907 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
8908 GEN_INT (m->fs.fp_offset
8909 - reg_save_offset),
8910 style, false);
8912 else if (m->fs.sp_offset != reg_save_offset)
8914 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8915 GEN_INT (m->fs.sp_offset
8916 - reg_save_offset),
8917 style,
8918 m->fs.cfa_reg == stack_pointer_rtx);
8921 ix86_emit_restore_regs_using_pop ();
8924 /* If we used a stack pointer and haven't already got rid of it,
8925 then do so now. */
8926 if (m->fs.fp_valid)
8928 /* If the stack pointer is valid and pointing at the frame
8929 pointer store address, then we only need a pop. */
8930 if (sp_valid_at (frame.hfp_save_offset)
8931 && m->fs.sp_offset == frame.hfp_save_offset)
8932 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
8933 /* Leave results in shorter dependency chains on CPUs that are
8934 able to grok it fast. */
8935 else if (TARGET_USE_LEAVE
8936 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
8937 || !cfun->machine->use_fast_prologue_epilogue)
8938 ix86_emit_leave (NULL);
8939 else
8941 pro_epilogue_adjust_stack (stack_pointer_rtx,
8942 hard_frame_pointer_rtx,
8943 const0_rtx, style, !using_drap);
8944 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
8948 if (using_drap)
8950 int param_ptr_offset = UNITS_PER_WORD;
8951 rtx_insn *insn;
8953 gcc_assert (stack_realign_drap);
8955 if (ix86_static_chain_on_stack)
8956 param_ptr_offset += UNITS_PER_WORD;
8957 if (!call_used_regs[REGNO (crtl->drap_reg)])
8958 param_ptr_offset += UNITS_PER_WORD;
8960 insn = emit_insn (gen_rtx_SET
8961 (stack_pointer_rtx,
8962 gen_rtx_PLUS (Pmode,
8963 crtl->drap_reg,
8964 GEN_INT (-param_ptr_offset))));
8965 m->fs.cfa_reg = stack_pointer_rtx;
8966 m->fs.cfa_offset = param_ptr_offset;
8967 m->fs.sp_offset = param_ptr_offset;
8968 m->fs.realigned = false;
8970 add_reg_note (insn, REG_CFA_DEF_CFA,
8971 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8972 GEN_INT (param_ptr_offset)));
8973 RTX_FRAME_RELATED_P (insn) = 1;
8975 if (!call_used_regs[REGNO (crtl->drap_reg)])
8976 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
8979 /* At this point the stack pointer must be valid, and we must have
8980 restored all of the registers. We may not have deallocated the
8981 entire stack frame. We've delayed this until now because it may
8982 be possible to merge the local stack deallocation with the
8983 deallocation forced by ix86_static_chain_on_stack. */
8984 gcc_assert (m->fs.sp_valid);
8985 gcc_assert (!m->fs.sp_realigned);
8986 gcc_assert (!m->fs.fp_valid);
8987 gcc_assert (!m->fs.realigned);
8988 if (m->fs.sp_offset != UNITS_PER_WORD)
8990 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8991 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
8992 style, true);
8994 else
8995 ix86_add_queued_cfa_restore_notes (get_last_insn ());
8997 /* Sibcall epilogues don't want a return instruction. */
8998 if (style == 0)
9000 m->fs = frame_state_save;
9001 return;
9004 if (cfun->machine->func_type != TYPE_NORMAL)
9005 emit_jump_insn (gen_interrupt_return ());
9006 else if (crtl->args.pops_args && crtl->args.size)
9008 rtx popc = GEN_INT (crtl->args.pops_args);
9010 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9011 address, do explicit add, and jump indirectly to the caller. */
9013 if (crtl->args.pops_args >= 65536)
9015 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9016 rtx_insn *insn;
9018 /* There is no "pascal" calling convention in any 64bit ABI. */
9019 gcc_assert (!TARGET_64BIT);
9021 insn = emit_insn (gen_pop (ecx));
9022 m->fs.cfa_offset -= UNITS_PER_WORD;
9023 m->fs.sp_offset -= UNITS_PER_WORD;
9025 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9026 x = gen_rtx_SET (stack_pointer_rtx, x);
9027 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9028 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9029 RTX_FRAME_RELATED_P (insn) = 1;
9031 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9032 popc, -1, true);
9033 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9035 else
9036 emit_jump_insn (gen_simple_return_pop_internal (popc));
9038 else if (!m->call_ms2sysv || !restore_stub_is_tail)
9040 /* In case of return from EH a simple return cannot be used
9041 as a return address will be compared with a shadow stack
9042 return address. Use indirect jump instead. */
9043 if (style == 2 && flag_cf_protection)
9045 /* Register used in indirect jump must be in word_mode. But
9046 Pmode may not be the same as word_mode for x32. */
9047 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
9048 rtx_insn *insn;
9050 insn = emit_insn (gen_pop (ecx));
9051 m->fs.cfa_offset -= UNITS_PER_WORD;
9052 m->fs.sp_offset -= UNITS_PER_WORD;
9054 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9055 x = gen_rtx_SET (stack_pointer_rtx, x);
9056 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9057 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9058 RTX_FRAME_RELATED_P (insn) = 1;
9060 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9062 else
9063 emit_jump_insn (gen_simple_return_internal ());
9066 /* Restore the state back to the state from the prologue,
9067 so that it's correct for the next epilogue. */
9068 m->fs = frame_state_save;
9071 /* Reset from the function's potential modifications. */
9073 static void
9074 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
9076 if (pic_offset_table_rtx
9077 && !ix86_use_pseudo_pic_reg ())
9078 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9080 if (TARGET_MACHO)
9082 rtx_insn *insn = get_last_insn ();
9083 rtx_insn *deleted_debug_label = NULL;
9085 /* Mach-O doesn't support labels at the end of objects, so if
9086 it looks like we might want one, take special action.
9087 First, collect any sequence of deleted debug labels. */
9088 while (insn
9089 && NOTE_P (insn)
9090 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9092 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9093 notes only, instead set their CODE_LABEL_NUMBER to -1,
9094 otherwise there would be code generation differences
9095 in between -g and -g0. */
9096 if (NOTE_P (insn) && NOTE_KIND (insn)
9097 == NOTE_INSN_DELETED_DEBUG_LABEL)
9098 deleted_debug_label = insn;
9099 insn = PREV_INSN (insn);
9102 /* If we have:
9103 label:
9104 barrier
9105 then this needs to be detected, so skip past the barrier. */
9107 if (insn && BARRIER_P (insn))
9108 insn = PREV_INSN (insn);
9110 /* Up to now we've only seen notes or barriers. */
9111 if (insn)
9113 if (LABEL_P (insn)
9114 || (NOTE_P (insn)
9115 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
9116 /* Trailing label. */
9117 fputs ("\tnop\n", file);
9118 else if (cfun && ! cfun->is_thunk)
9120 /* See if we have a completely empty function body, skipping
9121 the special case of the picbase thunk emitted as asm. */
9122 while (insn && ! INSN_P (insn))
9123 insn = PREV_INSN (insn);
9124 /* If we don't find any insns, we've got an empty function body;
9125 I.e. completely empty - without a return or branch. This is
9126 taken as the case where a function body has been removed
9127 because it contains an inline __builtin_unreachable(). GCC
9128 declares that reaching __builtin_unreachable() means UB so
9129 we're not obliged to do anything special; however, we want
9130 non-zero-sized function bodies. To meet this, and help the
9131 user out, let's trap the case. */
9132 if (insn == NULL)
9133 fputs ("\tud2\n", file);
9136 else if (deleted_debug_label)
9137 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
9138 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
9139 CODE_LABEL_NUMBER (insn) = -1;
9143 /* Return a scratch register to use in the split stack prologue. The
9144 split stack prologue is used for -fsplit-stack. It is the first
9145 instructions in the function, even before the regular prologue.
9146 The scratch register can be any caller-saved register which is not
9147 used for parameters or for the static chain. */
9149 static unsigned int
9150 split_stack_prologue_scratch_regno (void)
9152 if (TARGET_64BIT)
9153 return R11_REG;
9154 else
9156 bool is_fastcall, is_thiscall;
9157 int regparm;
9159 is_fastcall = (lookup_attribute ("fastcall",
9160 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9161 != NULL);
9162 is_thiscall = (lookup_attribute ("thiscall",
9163 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9164 != NULL);
9165 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
9167 if (is_fastcall)
9169 if (DECL_STATIC_CHAIN (cfun->decl))
9171 sorry ("%<-fsplit-stack%> does not support fastcall with "
9172 "nested function");
9173 return INVALID_REGNUM;
9175 return AX_REG;
9177 else if (is_thiscall)
9179 if (!DECL_STATIC_CHAIN (cfun->decl))
9180 return DX_REG;
9181 return AX_REG;
9183 else if (regparm < 3)
9185 if (!DECL_STATIC_CHAIN (cfun->decl))
9186 return CX_REG;
9187 else
9189 if (regparm >= 2)
9191 sorry ("%<-fsplit-stack%> does not support 2 register "
9192 "parameters for a nested function");
9193 return INVALID_REGNUM;
9195 return DX_REG;
9198 else
9200 /* FIXME: We could make this work by pushing a register
9201 around the addition and comparison. */
9202 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9203 return INVALID_REGNUM;
9208 /* A SYMBOL_REF for the function which allocates new stackspace for
9209 -fsplit-stack. */
9211 static GTY(()) rtx split_stack_fn;
9213 /* A SYMBOL_REF for the more stack function when using the large
9214 model. */
9216 static GTY(()) rtx split_stack_fn_large;
9218 /* Return location of the stack guard value in the TLS block. */
9221 ix86_split_stack_guard (void)
9223 int offset;
9224 addr_space_t as = DEFAULT_TLS_SEG_REG;
9225 rtx r;
9227 gcc_assert (flag_split_stack);
9229 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9230 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
9231 #else
9232 gcc_unreachable ();
9233 #endif
9235 r = GEN_INT (offset);
9236 r = gen_const_mem (Pmode, r);
9237 set_mem_addr_space (r, as);
9239 return r;
9242 /* Handle -fsplit-stack. These are the first instructions in the
9243 function, even before the regular prologue. */
9245 void
9246 ix86_expand_split_stack_prologue (void)
9248 HOST_WIDE_INT allocate;
9249 unsigned HOST_WIDE_INT args_size;
9250 rtx_code_label *label;
9251 rtx limit, current, allocate_rtx, call_fusage;
9252 rtx_insn *call_insn;
9253 rtx scratch_reg = NULL_RTX;
9254 rtx_code_label *varargs_label = NULL;
9255 rtx fn;
9257 gcc_assert (flag_split_stack && reload_completed);
9259 ix86_finalize_stack_frame_flags ();
9260 struct ix86_frame &frame = cfun->machine->frame;
9261 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
9263 /* This is the label we will branch to if we have enough stack
9264 space. We expect the basic block reordering pass to reverse this
9265 branch if optimizing, so that we branch in the unlikely case. */
9266 label = gen_label_rtx ();
9268 /* We need to compare the stack pointer minus the frame size with
9269 the stack boundary in the TCB. The stack boundary always gives
9270 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
9271 can compare directly. Otherwise we need to do an addition. */
9273 limit = ix86_split_stack_guard ();
9275 if (allocate < SPLIT_STACK_AVAILABLE)
9276 current = stack_pointer_rtx;
9277 else
9279 unsigned int scratch_regno;
9280 rtx offset;
9282 /* We need a scratch register to hold the stack pointer minus
9283 the required frame size. Since this is the very start of the
9284 function, the scratch register can be any caller-saved
9285 register which is not used for parameters. */
9286 offset = GEN_INT (- allocate);
9287 scratch_regno = split_stack_prologue_scratch_regno ();
9288 if (scratch_regno == INVALID_REGNUM)
9289 return;
9290 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9291 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
9293 /* We don't use ix86_gen_add3 in this case because it will
9294 want to split to lea, but when not optimizing the insn
9295 will not be split after this point. */
9296 emit_insn (gen_rtx_SET (scratch_reg,
9297 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9298 offset)));
9300 else
9302 emit_move_insn (scratch_reg, offset);
9303 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
9304 stack_pointer_rtx));
9306 current = scratch_reg;
9309 ix86_expand_branch (GEU, current, limit, label);
9310 rtx_insn *jump_insn = get_last_insn ();
9311 JUMP_LABEL (jump_insn) = label;
9313 /* Mark the jump as very likely to be taken. */
9314 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
9316 if (split_stack_fn == NULL_RTX)
9318 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
9319 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
9321 fn = split_stack_fn;
9323 /* Get more stack space. We pass in the desired stack space and the
9324 size of the arguments to copy to the new stack. In 32-bit mode
9325 we push the parameters; __morestack will return on a new stack
9326 anyhow. In 64-bit mode we pass the parameters in r10 and
9327 r11. */
9328 allocate_rtx = GEN_INT (allocate);
9329 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
9330 call_fusage = NULL_RTX;
9331 rtx pop = NULL_RTX;
9332 if (TARGET_64BIT)
9334 rtx reg10, reg11;
9336 reg10 = gen_rtx_REG (Pmode, R10_REG);
9337 reg11 = gen_rtx_REG (Pmode, R11_REG);
9339 /* If this function uses a static chain, it will be in %r10.
9340 Preserve it across the call to __morestack. */
9341 if (DECL_STATIC_CHAIN (cfun->decl))
9343 rtx rax;
9345 rax = gen_rtx_REG (word_mode, AX_REG);
9346 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
9347 use_reg (&call_fusage, rax);
9350 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
9351 && !TARGET_PECOFF)
9353 HOST_WIDE_INT argval;
9355 gcc_assert (Pmode == DImode);
9356 /* When using the large model we need to load the address
9357 into a register, and we've run out of registers. So we
9358 switch to a different calling convention, and we call a
9359 different function: __morestack_large. We pass the
9360 argument size in the upper 32 bits of r10 and pass the
9361 frame size in the lower 32 bits. */
9362 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
9363 gcc_assert ((args_size & 0xffffffff) == args_size);
9365 if (split_stack_fn_large == NULL_RTX)
9367 split_stack_fn_large
9368 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
9369 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
9371 if (ix86_cmodel == CM_LARGE_PIC)
9373 rtx_code_label *label;
9374 rtx x;
9376 label = gen_label_rtx ();
9377 emit_label (label);
9378 LABEL_PRESERVE_P (label) = 1;
9379 emit_insn (gen_set_rip_rex64 (reg10, label));
9380 emit_insn (gen_set_got_offset_rex64 (reg11, label));
9381 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
9382 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
9383 UNSPEC_GOT);
9384 x = gen_rtx_CONST (Pmode, x);
9385 emit_move_insn (reg11, x);
9386 x = gen_rtx_PLUS (Pmode, reg10, reg11);
9387 x = gen_const_mem (Pmode, x);
9388 emit_move_insn (reg11, x);
9390 else
9391 emit_move_insn (reg11, split_stack_fn_large);
9393 fn = reg11;
9395 argval = ((args_size << 16) << 16) + allocate;
9396 emit_move_insn (reg10, GEN_INT (argval));
9398 else
9400 emit_move_insn (reg10, allocate_rtx);
9401 emit_move_insn (reg11, GEN_INT (args_size));
9402 use_reg (&call_fusage, reg11);
9405 use_reg (&call_fusage, reg10);
9407 else
9409 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
9410 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
9411 insn = emit_insn (gen_push (allocate_rtx));
9412 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
9413 pop = GEN_INT (2 * UNITS_PER_WORD);
9415 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
9416 GEN_INT (UNITS_PER_WORD), constm1_rtx,
9417 pop, false);
9418 add_function_usage_to (call_insn, call_fusage);
9419 if (!TARGET_64BIT)
9420 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
9421 /* Indicate that this function can't jump to non-local gotos. */
9422 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
9424 /* In order to make call/return prediction work right, we now need
9425 to execute a return instruction. See
9426 libgcc/config/i386/morestack.S for the details on how this works.
9428 For flow purposes gcc must not see this as a return
9429 instruction--we need control flow to continue at the subsequent
9430 label. Therefore, we use an unspec. */
9431 gcc_assert (crtl->args.pops_args < 65536);
9432 rtx_insn *ret_insn
9433 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
9435 if ((flag_cf_protection & CF_BRANCH))
9437 /* Insert ENDBR since __morestack will jump back here via indirect
9438 call. */
9439 rtx cet_eb = gen_nop_endbr ();
9440 emit_insn_after (cet_eb, ret_insn);
9443 /* If we are in 64-bit mode and this function uses a static chain,
9444 we saved %r10 in %rax before calling _morestack. */
9445 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
9446 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9447 gen_rtx_REG (word_mode, AX_REG));
9449 /* If this function calls va_start, we need to store a pointer to
9450 the arguments on the old stack, because they may not have been
9451 all copied to the new stack. At this point the old stack can be
9452 found at the frame pointer value used by __morestack, because
9453 __morestack has set that up before calling back to us. Here we
9454 store that pointer in a scratch register, and in
9455 ix86_expand_prologue we store the scratch register in a stack
9456 slot. */
9457 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9459 unsigned int scratch_regno;
9460 rtx frame_reg;
9461 int words;
9463 scratch_regno = split_stack_prologue_scratch_regno ();
9464 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9465 frame_reg = gen_rtx_REG (Pmode, BP_REG);
9467 /* 64-bit:
9468 fp -> old fp value
9469 return address within this function
9470 return address of caller of this function
9471 stack arguments
9472 So we add three words to get to the stack arguments.
9474 32-bit:
9475 fp -> old fp value
9476 return address within this function
9477 first argument to __morestack
9478 second argument to __morestack
9479 return address of caller of this function
9480 stack arguments
9481 So we add five words to get to the stack arguments.
9483 words = TARGET_64BIT ? 3 : 5;
9484 emit_insn (gen_rtx_SET (scratch_reg,
9485 gen_rtx_PLUS (Pmode, frame_reg,
9486 GEN_INT (words * UNITS_PER_WORD))));
9488 varargs_label = gen_label_rtx ();
9489 emit_jump_insn (gen_jump (varargs_label));
9490 JUMP_LABEL (get_last_insn ()) = varargs_label;
9492 emit_barrier ();
9495 emit_label (label);
9496 LABEL_NUSES (label) = 1;
9498 /* If this function calls va_start, we now have to set the scratch
9499 register for the case where we do not call __morestack. In this
9500 case we need to set it based on the stack pointer. */
9501 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9503 emit_insn (gen_rtx_SET (scratch_reg,
9504 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9505 GEN_INT (UNITS_PER_WORD))));
9507 emit_label (varargs_label);
9508 LABEL_NUSES (varargs_label) = 1;
9512 /* We may have to tell the dataflow pass that the split stack prologue
9513 is initializing a scratch register. */
9515 static void
9516 ix86_live_on_entry (bitmap regs)
9518 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9520 gcc_assert (flag_split_stack);
9521 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
9525 /* Extract the parts of an RTL expression that is a valid memory address
9526 for an instruction. Return 0 if the structure of the address is
9527 grossly off. Return -1 if the address contains ASHIFT, so it is not
9528 strictly valid, but still used for computing length of lea instruction. */
9531 ix86_decompose_address (rtx addr, struct ix86_address *out)
9533 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9534 rtx base_reg, index_reg;
9535 HOST_WIDE_INT scale = 1;
9536 rtx scale_rtx = NULL_RTX;
9537 rtx tmp;
9538 int retval = 1;
9539 addr_space_t seg = ADDR_SPACE_GENERIC;
9541 /* Allow zero-extended SImode addresses,
9542 they will be emitted with addr32 prefix. */
9543 if (TARGET_64BIT && GET_MODE (addr) == DImode)
9545 if (GET_CODE (addr) == ZERO_EXTEND
9546 && GET_MODE (XEXP (addr, 0)) == SImode)
9548 addr = XEXP (addr, 0);
9549 if (CONST_INT_P (addr))
9550 return 0;
9552 else if (GET_CODE (addr) == AND
9553 && const_32bit_mask (XEXP (addr, 1), DImode))
9555 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
9556 if (addr == NULL_RTX)
9557 return 0;
9559 if (CONST_INT_P (addr))
9560 return 0;
9564 /* Allow SImode subregs of DImode addresses,
9565 they will be emitted with addr32 prefix. */
9566 if (TARGET_64BIT && GET_MODE (addr) == SImode)
9568 if (SUBREG_P (addr)
9569 && GET_MODE (SUBREG_REG (addr)) == DImode)
9571 addr = SUBREG_REG (addr);
9572 if (CONST_INT_P (addr))
9573 return 0;
9577 if (REG_P (addr))
9578 base = addr;
9579 else if (SUBREG_P (addr))
9581 if (REG_P (SUBREG_REG (addr)))
9582 base = addr;
9583 else
9584 return 0;
9586 else if (GET_CODE (addr) == PLUS)
9588 rtx addends[4], op;
9589 int n = 0, i;
9591 op = addr;
9594 if (n >= 4)
9595 return 0;
9596 addends[n++] = XEXP (op, 1);
9597 op = XEXP (op, 0);
9599 while (GET_CODE (op) == PLUS);
9600 if (n >= 4)
9601 return 0;
9602 addends[n] = op;
9604 for (i = n; i >= 0; --i)
9606 op = addends[i];
9607 switch (GET_CODE (op))
9609 case MULT:
9610 if (index)
9611 return 0;
9612 index = XEXP (op, 0);
9613 scale_rtx = XEXP (op, 1);
9614 break;
9616 case ASHIFT:
9617 if (index)
9618 return 0;
9619 index = XEXP (op, 0);
9620 tmp = XEXP (op, 1);
9621 if (!CONST_INT_P (tmp))
9622 return 0;
9623 scale = INTVAL (tmp);
9624 if ((unsigned HOST_WIDE_INT) scale > 3)
9625 return 0;
9626 scale = 1 << scale;
9627 break;
9629 case ZERO_EXTEND:
9630 op = XEXP (op, 0);
9631 if (GET_CODE (op) != UNSPEC)
9632 return 0;
9633 /* FALLTHRU */
9635 case UNSPEC:
9636 if (XINT (op, 1) == UNSPEC_TP
9637 && TARGET_TLS_DIRECT_SEG_REFS
9638 && seg == ADDR_SPACE_GENERIC)
9639 seg = DEFAULT_TLS_SEG_REG;
9640 else
9641 return 0;
9642 break;
9644 case SUBREG:
9645 if (!REG_P (SUBREG_REG (op)))
9646 return 0;
9647 /* FALLTHRU */
9649 case REG:
9650 if (!base)
9651 base = op;
9652 else if (!index)
9653 index = op;
9654 else
9655 return 0;
9656 break;
9658 case CONST:
9659 case CONST_INT:
9660 case SYMBOL_REF:
9661 case LABEL_REF:
9662 if (disp)
9663 return 0;
9664 disp = op;
9665 break;
9667 default:
9668 return 0;
9672 else if (GET_CODE (addr) == MULT)
9674 index = XEXP (addr, 0); /* index*scale */
9675 scale_rtx = XEXP (addr, 1);
9677 else if (GET_CODE (addr) == ASHIFT)
9679 /* We're called for lea too, which implements ashift on occasion. */
9680 index = XEXP (addr, 0);
9681 tmp = XEXP (addr, 1);
9682 if (!CONST_INT_P (tmp))
9683 return 0;
9684 scale = INTVAL (tmp);
9685 if ((unsigned HOST_WIDE_INT) scale > 3)
9686 return 0;
9687 scale = 1 << scale;
9688 retval = -1;
9690 else
9691 disp = addr; /* displacement */
9693 if (index)
9695 if (REG_P (index))
9697 else if (SUBREG_P (index)
9698 && REG_P (SUBREG_REG (index)))
9700 else
9701 return 0;
9704 /* Extract the integral value of scale. */
9705 if (scale_rtx)
9707 if (!CONST_INT_P (scale_rtx))
9708 return 0;
9709 scale = INTVAL (scale_rtx);
9712 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
9713 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
9715 /* Avoid useless 0 displacement. */
9716 if (disp == const0_rtx && (base || index))
9717 disp = NULL_RTX;
9719 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9720 if (base_reg && index_reg && scale == 1
9721 && (REGNO (index_reg) == ARG_POINTER_REGNUM
9722 || REGNO (index_reg) == FRAME_POINTER_REGNUM
9723 || REGNO (index_reg) == SP_REG))
9725 std::swap (base, index);
9726 std::swap (base_reg, index_reg);
9729 /* Special case: %ebp cannot be encoded as a base without a displacement.
9730 Similarly %r13. */
9731 if (!disp && base_reg
9732 && (REGNO (base_reg) == ARG_POINTER_REGNUM
9733 || REGNO (base_reg) == FRAME_POINTER_REGNUM
9734 || REGNO (base_reg) == BP_REG
9735 || REGNO (base_reg) == R13_REG))
9736 disp = const0_rtx;
9738 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9739 Avoid this by transforming to [%esi+0].
9740 Reload calls address legitimization without cfun defined, so we need
9741 to test cfun for being non-NULL. */
9742 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9743 && base_reg && !index_reg && !disp
9744 && REGNO (base_reg) == SI_REG)
9745 disp = const0_rtx;
9747 /* Special case: encode reg+reg instead of reg*2. */
9748 if (!base && index && scale == 2)
9749 base = index, base_reg = index_reg, scale = 1;
9751 /* Special case: scaling cannot be encoded without base or displacement. */
9752 if (!base && !disp && index && scale != 1)
9753 disp = const0_rtx;
9755 out->base = base;
9756 out->index = index;
9757 out->disp = disp;
9758 out->scale = scale;
9759 out->seg = seg;
9761 return retval;
9764 /* Return cost of the memory address x.
9765 For i386, it is better to use a complex address than let gcc copy
9766 the address into a reg and make a new pseudo. But not if the address
9767 requires to two regs - that would mean more pseudos with longer
9768 lifetimes. */
9769 static int
9770 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
9772 struct ix86_address parts;
9773 int cost = 1;
9774 int ok = ix86_decompose_address (x, &parts);
9776 gcc_assert (ok);
9778 if (parts.base && SUBREG_P (parts.base))
9779 parts.base = SUBREG_REG (parts.base);
9780 if (parts.index && SUBREG_P (parts.index))
9781 parts.index = SUBREG_REG (parts.index);
9783 /* Attempt to minimize number of registers in the address by increasing
9784 address cost for each used register. We don't increase address cost
9785 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
9786 is not invariant itself it most likely means that base or index is not
9787 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
9788 which is not profitable for x86. */
9789 if (parts.base
9790 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9791 && (current_pass->type == GIMPLE_PASS
9792 || !pic_offset_table_rtx
9793 || !REG_P (parts.base)
9794 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
9795 cost++;
9797 if (parts.index
9798 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9799 && (current_pass->type == GIMPLE_PASS
9800 || !pic_offset_table_rtx
9801 || !REG_P (parts.index)
9802 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
9803 cost++;
9805 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9806 since it's predecode logic can't detect the length of instructions
9807 and it degenerates to vector decoded. Increase cost of such
9808 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9809 to split such addresses or even refuse such addresses at all.
9811 Following addressing modes are affected:
9812 [base+scale*index]
9813 [scale*index+disp]
9814 [base+index]
9816 The first and last case may be avoidable by explicitly coding the zero in
9817 memory address, but I don't have AMD-K6 machine handy to check this
9818 theory. */
9820 if (TARGET_K6
9821 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9822 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9823 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9824 cost += 10;
9826 return cost;
9829 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9830 this is used for to form addresses to local data when -fPIC is in
9831 use. */
9833 static bool
9834 darwin_local_data_pic (rtx disp)
9836 return (GET_CODE (disp) == UNSPEC
9837 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9840 /* True if operand X should be loaded from GOT. */
9842 bool
9843 ix86_force_load_from_GOT_p (rtx x)
9845 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
9846 && !TARGET_PECOFF && !TARGET_MACHO
9847 && !flag_pic
9848 && ix86_cmodel != CM_LARGE
9849 && GET_CODE (x) == SYMBOL_REF
9850 && SYMBOL_REF_FUNCTION_P (x)
9851 && (!flag_plt
9852 || (SYMBOL_REF_DECL (x)
9853 && lookup_attribute ("noplt",
9854 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))
9855 && !SYMBOL_REF_LOCAL_P (x));
9858 /* Determine if a given RTX is a valid constant. We already know this
9859 satisfies CONSTANT_P. */
9861 static bool
9862 ix86_legitimate_constant_p (machine_mode mode, rtx x)
9864 switch (GET_CODE (x))
9866 case CONST:
9867 x = XEXP (x, 0);
9869 if (GET_CODE (x) == PLUS)
9871 if (!CONST_INT_P (XEXP (x, 1)))
9872 return false;
9873 x = XEXP (x, 0);
9876 if (TARGET_MACHO && darwin_local_data_pic (x))
9877 return true;
9879 /* Only some unspecs are valid as "constants". */
9880 if (GET_CODE (x) == UNSPEC)
9881 switch (XINT (x, 1))
9883 case UNSPEC_GOT:
9884 case UNSPEC_GOTOFF:
9885 case UNSPEC_PLTOFF:
9886 return TARGET_64BIT;
9887 case UNSPEC_TPOFF:
9888 case UNSPEC_NTPOFF:
9889 x = XVECEXP (x, 0, 0);
9890 return (GET_CODE (x) == SYMBOL_REF
9891 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9892 case UNSPEC_DTPOFF:
9893 x = XVECEXP (x, 0, 0);
9894 return (GET_CODE (x) == SYMBOL_REF
9895 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9896 default:
9897 return false;
9900 /* We must have drilled down to a symbol. */
9901 if (GET_CODE (x) == LABEL_REF)
9902 return true;
9903 if (GET_CODE (x) != SYMBOL_REF)
9904 return false;
9905 /* FALLTHRU */
9907 case SYMBOL_REF:
9908 /* TLS symbols are never valid. */
9909 if (SYMBOL_REF_TLS_MODEL (x))
9910 return false;
9912 /* DLLIMPORT symbols are never valid. */
9913 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9914 && SYMBOL_REF_DLLIMPORT_P (x))
9915 return false;
9917 #if TARGET_MACHO
9918 /* mdynamic-no-pic */
9919 if (MACHO_DYNAMIC_NO_PIC_P)
9920 return machopic_symbol_defined_p (x);
9921 #endif
9923 /* External function address should be loaded
9924 via the GOT slot to avoid PLT. */
9925 if (ix86_force_load_from_GOT_p (x))
9926 return false;
9928 break;
9930 CASE_CONST_SCALAR_INT:
9931 switch (mode)
9933 case E_TImode:
9934 if (TARGET_64BIT)
9935 return true;
9936 /* FALLTHRU */
9937 case E_OImode:
9938 case E_XImode:
9939 if (!standard_sse_constant_p (x, mode))
9940 return false;
9941 default:
9942 break;
9944 break;
9946 case CONST_VECTOR:
9947 if (!standard_sse_constant_p (x, mode))
9948 return false;
9950 default:
9951 break;
9954 /* Otherwise we handle everything else in the move patterns. */
9955 return true;
9958 /* Determine if it's legal to put X into the constant pool. This
9959 is not possible for the address of thread-local symbols, which
9960 is checked above. */
9962 static bool
9963 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
9965 /* We can put any immediate constant in memory. */
9966 switch (GET_CODE (x))
9968 CASE_CONST_ANY:
9969 return false;
9971 default:
9972 break;
9975 return !ix86_legitimate_constant_p (mode, x);
9978 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
9979 otherwise zero. */
9981 static bool
9982 is_imported_p (rtx x)
9984 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
9985 || GET_CODE (x) != SYMBOL_REF)
9986 return false;
9988 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
9992 /* Nonzero if the constant value X is a legitimate general operand
9993 when generating PIC code. It is given that flag_pic is on and
9994 that X satisfies CONSTANT_P. */
9996 bool
9997 legitimate_pic_operand_p (rtx x)
9999 rtx inner;
10001 switch (GET_CODE (x))
10003 case CONST:
10004 inner = XEXP (x, 0);
10005 if (GET_CODE (inner) == PLUS
10006 && CONST_INT_P (XEXP (inner, 1)))
10007 inner = XEXP (inner, 0);
10009 /* Only some unspecs are valid as "constants". */
10010 if (GET_CODE (inner) == UNSPEC)
10011 switch (XINT (inner, 1))
10013 case UNSPEC_GOT:
10014 case UNSPEC_GOTOFF:
10015 case UNSPEC_PLTOFF:
10016 return TARGET_64BIT;
10017 case UNSPEC_TPOFF:
10018 x = XVECEXP (inner, 0, 0);
10019 return (GET_CODE (x) == SYMBOL_REF
10020 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10021 case UNSPEC_MACHOPIC_OFFSET:
10022 return legitimate_pic_address_disp_p (x);
10023 default:
10024 return false;
10026 /* FALLTHRU */
10028 case SYMBOL_REF:
10029 case LABEL_REF:
10030 return legitimate_pic_address_disp_p (x);
10032 default:
10033 return true;
10037 /* Determine if a given CONST RTX is a valid memory displacement
10038 in PIC mode. */
10040 bool
10041 legitimate_pic_address_disp_p (rtx disp)
10043 bool saw_plus;
10045 /* In 64bit mode we can allow direct addresses of symbols and labels
10046 when they are not dynamic symbols. */
10047 if (TARGET_64BIT)
10049 rtx op0 = disp, op1;
10051 switch (GET_CODE (disp))
10053 case LABEL_REF:
10054 return true;
10056 case CONST:
10057 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10058 break;
10059 op0 = XEXP (XEXP (disp, 0), 0);
10060 op1 = XEXP (XEXP (disp, 0), 1);
10061 if (!CONST_INT_P (op1))
10062 break;
10063 if (GET_CODE (op0) == UNSPEC
10064 && (XINT (op0, 1) == UNSPEC_DTPOFF
10065 || XINT (op0, 1) == UNSPEC_NTPOFF)
10066 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
10067 return true;
10068 if (INTVAL (op1) >= 16*1024*1024
10069 || INTVAL (op1) < -16*1024*1024)
10070 break;
10071 if (GET_CODE (op0) == LABEL_REF)
10072 return true;
10073 if (GET_CODE (op0) == CONST
10074 && GET_CODE (XEXP (op0, 0)) == UNSPEC
10075 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
10076 return true;
10077 if (GET_CODE (op0) == UNSPEC
10078 && XINT (op0, 1) == UNSPEC_PCREL)
10079 return true;
10080 if (GET_CODE (op0) != SYMBOL_REF)
10081 break;
10082 /* FALLTHRU */
10084 case SYMBOL_REF:
10085 /* TLS references should always be enclosed in UNSPEC.
10086 The dllimported symbol needs always to be resolved. */
10087 if (SYMBOL_REF_TLS_MODEL (op0)
10088 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
10089 return false;
10091 if (TARGET_PECOFF)
10093 if (is_imported_p (op0))
10094 return true;
10096 if (SYMBOL_REF_FAR_ADDR_P (op0)
10097 || !SYMBOL_REF_LOCAL_P (op0))
10098 break;
10100 /* Function-symbols need to be resolved only for
10101 large-model.
10102 For the small-model we don't need to resolve anything
10103 here. */
10104 if ((ix86_cmodel != CM_LARGE_PIC
10105 && SYMBOL_REF_FUNCTION_P (op0))
10106 || ix86_cmodel == CM_SMALL_PIC)
10107 return true;
10108 /* Non-external symbols don't need to be resolved for
10109 large, and medium-model. */
10110 if ((ix86_cmodel == CM_LARGE_PIC
10111 || ix86_cmodel == CM_MEDIUM_PIC)
10112 && !SYMBOL_REF_EXTERNAL_P (op0))
10113 return true;
10115 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
10116 && (SYMBOL_REF_LOCAL_P (op0)
10117 || (HAVE_LD_PIE_COPYRELOC
10118 && flag_pie
10119 && !SYMBOL_REF_WEAK (op0)
10120 && !SYMBOL_REF_FUNCTION_P (op0)))
10121 && ix86_cmodel != CM_LARGE_PIC)
10122 return true;
10123 break;
10125 default:
10126 break;
10129 if (GET_CODE (disp) != CONST)
10130 return false;
10131 disp = XEXP (disp, 0);
10133 if (TARGET_64BIT)
10135 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10136 of GOT tables. We should not need these anyway. */
10137 if (GET_CODE (disp) != UNSPEC
10138 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10139 && XINT (disp, 1) != UNSPEC_GOTOFF
10140 && XINT (disp, 1) != UNSPEC_PCREL
10141 && XINT (disp, 1) != UNSPEC_PLTOFF))
10142 return false;
10144 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10145 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10146 return false;
10147 return true;
10150 saw_plus = false;
10151 if (GET_CODE (disp) == PLUS)
10153 if (!CONST_INT_P (XEXP (disp, 1)))
10154 return false;
10155 disp = XEXP (disp, 0);
10156 saw_plus = true;
10159 if (TARGET_MACHO && darwin_local_data_pic (disp))
10160 return true;
10162 if (GET_CODE (disp) != UNSPEC)
10163 return false;
10165 switch (XINT (disp, 1))
10167 case UNSPEC_GOT:
10168 if (saw_plus)
10169 return false;
10170 /* We need to check for both symbols and labels because VxWorks loads
10171 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10172 details. */
10173 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10174 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10175 case UNSPEC_GOTOFF:
10176 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10177 While ABI specify also 32bit relocation but we don't produce it in
10178 small PIC model at all. */
10179 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10180 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10181 && !TARGET_64BIT)
10182 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10183 return false;
10184 case UNSPEC_GOTTPOFF:
10185 case UNSPEC_GOTNTPOFF:
10186 case UNSPEC_INDNTPOFF:
10187 if (saw_plus)
10188 return false;
10189 disp = XVECEXP (disp, 0, 0);
10190 return (GET_CODE (disp) == SYMBOL_REF
10191 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10192 case UNSPEC_NTPOFF:
10193 disp = XVECEXP (disp, 0, 0);
10194 return (GET_CODE (disp) == SYMBOL_REF
10195 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10196 case UNSPEC_DTPOFF:
10197 disp = XVECEXP (disp, 0, 0);
10198 return (GET_CODE (disp) == SYMBOL_REF
10199 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10202 return false;
10205 /* Determine if op is suitable RTX for an address register.
10206 Return naked register if a register or a register subreg is
10207 found, otherwise return NULL_RTX. */
10209 static rtx
10210 ix86_validate_address_register (rtx op)
10212 machine_mode mode = GET_MODE (op);
10214 /* Only SImode or DImode registers can form the address. */
10215 if (mode != SImode && mode != DImode)
10216 return NULL_RTX;
10218 if (REG_P (op))
10219 return op;
10220 else if (SUBREG_P (op))
10222 rtx reg = SUBREG_REG (op);
10224 if (!REG_P (reg))
10225 return NULL_RTX;
10227 mode = GET_MODE (reg);
10229 /* Don't allow SUBREGs that span more than a word. It can
10230 lead to spill failures when the register is one word out
10231 of a two word structure. */
10232 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
10233 return NULL_RTX;
10235 /* Allow only SUBREGs of non-eliminable hard registers. */
10236 if (register_no_elim_operand (reg, mode))
10237 return reg;
10240 /* Op is not a register. */
10241 return NULL_RTX;
10244 /* Recognizes RTL expressions that are valid memory addresses for an
10245 instruction. The MODE argument is the machine mode for the MEM
10246 expression that wants to use this address.
10248 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10249 convert common non-canonical forms to canonical form so that they will
10250 be recognized. */
10252 static bool
10253 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
10255 struct ix86_address parts;
10256 rtx base, index, disp;
10257 HOST_WIDE_INT scale;
10258 addr_space_t seg;
10260 if (ix86_decompose_address (addr, &parts) <= 0)
10261 /* Decomposition failed. */
10262 return false;
10264 base = parts.base;
10265 index = parts.index;
10266 disp = parts.disp;
10267 scale = parts.scale;
10268 seg = parts.seg;
10270 /* Validate base register. */
10271 if (base)
10273 rtx reg = ix86_validate_address_register (base);
10275 if (reg == NULL_RTX)
10276 return false;
10278 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10279 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10280 /* Base is not valid. */
10281 return false;
10284 /* Validate index register. */
10285 if (index)
10287 rtx reg = ix86_validate_address_register (index);
10289 if (reg == NULL_RTX)
10290 return false;
10292 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10293 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10294 /* Index is not valid. */
10295 return false;
10298 /* Index and base should have the same mode. */
10299 if (base && index
10300 && GET_MODE (base) != GET_MODE (index))
10301 return false;
10303 /* Address override works only on the (%reg) part of %fs:(%reg). */
10304 if (seg != ADDR_SPACE_GENERIC
10305 && ((base && GET_MODE (base) != word_mode)
10306 || (index && GET_MODE (index) != word_mode)))
10307 return false;
10309 /* Validate scale factor. */
10310 if (scale != 1)
10312 if (!index)
10313 /* Scale without index. */
10314 return false;
10316 if (scale != 2 && scale != 4 && scale != 8)
10317 /* Scale is not a valid multiplier. */
10318 return false;
10321 /* Validate displacement. */
10322 if (disp)
10324 if (GET_CODE (disp) == CONST
10325 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10326 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10327 switch (XINT (XEXP (disp, 0), 1))
10329 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
10330 when used. While ABI specify also 32bit relocations, we
10331 don't produce them at all and use IP relative instead.
10332 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
10333 should be loaded via GOT. */
10334 case UNSPEC_GOT:
10335 if (!TARGET_64BIT
10336 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10337 goto is_legitimate_pic;
10338 /* FALLTHRU */
10339 case UNSPEC_GOTOFF:
10340 gcc_assert (flag_pic);
10341 if (!TARGET_64BIT)
10342 goto is_legitimate_pic;
10344 /* 64bit address unspec. */
10345 return false;
10347 case UNSPEC_GOTPCREL:
10348 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10349 goto is_legitimate_pic;
10350 /* FALLTHRU */
10351 case UNSPEC_PCREL:
10352 gcc_assert (flag_pic);
10353 goto is_legitimate_pic;
10355 case UNSPEC_GOTTPOFF:
10356 case UNSPEC_GOTNTPOFF:
10357 case UNSPEC_INDNTPOFF:
10358 case UNSPEC_NTPOFF:
10359 case UNSPEC_DTPOFF:
10360 break;
10362 default:
10363 /* Invalid address unspec. */
10364 return false;
10367 else if (SYMBOLIC_CONST (disp)
10368 && (flag_pic
10369 || (TARGET_MACHO
10370 #if TARGET_MACHO
10371 && MACHOPIC_INDIRECT
10372 && !machopic_operand_p (disp)
10373 #endif
10377 is_legitimate_pic:
10378 if (TARGET_64BIT && (index || base))
10380 /* foo@dtpoff(%rX) is ok. */
10381 if (GET_CODE (disp) != CONST
10382 || GET_CODE (XEXP (disp, 0)) != PLUS
10383 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10384 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10385 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10386 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10387 /* Non-constant pic memory reference. */
10388 return false;
10390 else if ((!TARGET_MACHO || flag_pic)
10391 && ! legitimate_pic_address_disp_p (disp))
10392 /* Displacement is an invalid pic construct. */
10393 return false;
10394 #if TARGET_MACHO
10395 else if (MACHO_DYNAMIC_NO_PIC_P
10396 && !ix86_legitimate_constant_p (Pmode, disp))
10397 /* displacment must be referenced via non_lazy_pointer */
10398 return false;
10399 #endif
10401 /* This code used to verify that a symbolic pic displacement
10402 includes the pic_offset_table_rtx register.
10404 While this is good idea, unfortunately these constructs may
10405 be created by "adds using lea" optimization for incorrect
10406 code like:
10408 int a;
10409 int foo(int i)
10411 return *(&a+i);
10414 This code is nonsensical, but results in addressing
10415 GOT table with pic_offset_table_rtx base. We can't
10416 just refuse it easily, since it gets matched by
10417 "addsi3" pattern, that later gets split to lea in the
10418 case output register differs from input. While this
10419 can be handled by separate addsi pattern for this case
10420 that never results in lea, this seems to be easier and
10421 correct fix for crash to disable this test. */
10423 else if (GET_CODE (disp) != LABEL_REF
10424 && !CONST_INT_P (disp)
10425 && (GET_CODE (disp) != CONST
10426 || !ix86_legitimate_constant_p (Pmode, disp))
10427 && (GET_CODE (disp) != SYMBOL_REF
10428 || !ix86_legitimate_constant_p (Pmode, disp)))
10429 /* Displacement is not constant. */
10430 return false;
10431 else if (TARGET_64BIT
10432 && !x86_64_immediate_operand (disp, VOIDmode))
10433 /* Displacement is out of range. */
10434 return false;
10435 /* In x32 mode, constant addresses are sign extended to 64bit, so
10436 we have to prevent addresses from 0x80000000 to 0xffffffff. */
10437 else if (TARGET_X32 && !(index || base)
10438 && CONST_INT_P (disp)
10439 && val_signbit_known_set_p (SImode, INTVAL (disp)))
10440 return false;
10443 /* Everything looks valid. */
10444 return true;
10447 /* Determine if a given RTX is a valid constant address. */
10449 bool
10450 constant_address_p (rtx x)
10452 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10455 /* Return a unique alias set for the GOT. */
10457 alias_set_type
10458 ix86_GOT_alias_set (void)
10460 static alias_set_type set = -1;
10461 if (set == -1)
10462 set = new_alias_set ();
10463 return set;
10466 /* Return a legitimate reference for ORIG (an address) using the
10467 register REG. If REG is 0, a new pseudo is generated.
10469 There are two types of references that must be handled:
10471 1. Global data references must load the address from the GOT, via
10472 the PIC reg. An insn is emitted to do this load, and the reg is
10473 returned.
10475 2. Static data references, constant pool addresses, and code labels
10476 compute the address as an offset from the GOT, whose base is in
10477 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10478 differentiate them from global data objects. The returned
10479 address is the PIC reg + an unspec constant.
10481 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10482 reg also appears in the address. */
10485 legitimize_pic_address (rtx orig, rtx reg)
10487 rtx addr = orig;
10488 rtx new_rtx = orig;
10490 #if TARGET_MACHO
10491 if (TARGET_MACHO && !TARGET_64BIT)
10493 if (reg == 0)
10494 reg = gen_reg_rtx (Pmode);
10495 /* Use the generic Mach-O PIC machinery. */
10496 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10498 #endif
10500 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10502 rtx tmp = legitimize_pe_coff_symbol (addr, true);
10503 if (tmp)
10504 return tmp;
10507 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10508 new_rtx = addr;
10509 else if ((!TARGET_64BIT
10510 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
10511 && !TARGET_PECOFF
10512 && gotoff_operand (addr, Pmode))
10514 /* This symbol may be referenced via a displacement
10515 from the PIC base address (@GOTOFF). */
10516 if (GET_CODE (addr) == CONST)
10517 addr = XEXP (addr, 0);
10519 if (GET_CODE (addr) == PLUS)
10521 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10522 UNSPEC_GOTOFF);
10523 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10525 else
10526 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10528 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10530 if (TARGET_64BIT)
10531 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
10533 if (reg != 0)
10535 gcc_assert (REG_P (reg));
10536 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
10537 new_rtx, reg, 1, OPTAB_DIRECT);
10539 else
10540 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10542 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10543 /* We can't use @GOTOFF for text labels
10544 on VxWorks, see gotoff_operand. */
10545 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10547 rtx tmp = legitimize_pe_coff_symbol (addr, true);
10548 if (tmp)
10549 return tmp;
10551 /* For x64 PE-COFF there is no GOT table,
10552 so we use address directly. */
10553 if (TARGET_64BIT && TARGET_PECOFF)
10555 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
10556 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10558 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10560 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
10561 UNSPEC_GOTPCREL);
10562 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10563 new_rtx = gen_const_mem (Pmode, new_rtx);
10564 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10566 else
10568 /* This symbol must be referenced via a load
10569 from the Global Offset Table (@GOT). */
10570 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10571 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10572 if (TARGET_64BIT)
10573 new_rtx = force_reg (Pmode, new_rtx);
10574 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10575 new_rtx = gen_const_mem (Pmode, new_rtx);
10576 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10579 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
10581 else
10583 if (CONST_INT_P (addr)
10584 && !x86_64_immediate_operand (addr, VOIDmode))
10585 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
10586 else if (GET_CODE (addr) == CONST)
10588 addr = XEXP (addr, 0);
10590 /* We must match stuff we generate before. Assume the only
10591 unspecs that can get here are ours. Not that we could do
10592 anything with them anyway.... */
10593 if (GET_CODE (addr) == UNSPEC
10594 || (GET_CODE (addr) == PLUS
10595 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10596 return orig;
10597 gcc_assert (GET_CODE (addr) == PLUS);
10600 if (GET_CODE (addr) == PLUS)
10602 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10604 /* Check first to see if this is a constant
10605 offset from a @GOTOFF symbol reference. */
10606 if (!TARGET_PECOFF
10607 && gotoff_operand (op0, Pmode)
10608 && CONST_INT_P (op1))
10610 if (!TARGET_64BIT)
10612 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10613 UNSPEC_GOTOFF);
10614 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10615 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10617 if (reg != 0)
10619 gcc_assert (REG_P (reg));
10620 new_rtx = expand_simple_binop (Pmode, PLUS,
10621 pic_offset_table_rtx,
10622 new_rtx, reg, 1,
10623 OPTAB_DIRECT);
10625 else
10626 new_rtx
10627 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10629 else
10631 if (INTVAL (op1) < -16*1024*1024
10632 || INTVAL (op1) >= 16*1024*1024)
10634 if (!x86_64_immediate_operand (op1, Pmode))
10635 op1 = force_reg (Pmode, op1);
10637 new_rtx
10638 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10642 else
10644 rtx base = legitimize_pic_address (op0, reg);
10645 machine_mode mode = GET_MODE (base);
10646 new_rtx
10647 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
10649 if (CONST_INT_P (new_rtx))
10651 if (INTVAL (new_rtx) < -16*1024*1024
10652 || INTVAL (new_rtx) >= 16*1024*1024)
10654 if (!x86_64_immediate_operand (new_rtx, mode))
10655 new_rtx = force_reg (mode, new_rtx);
10657 new_rtx
10658 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
10660 else
10661 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
10663 else
10665 /* For %rip addressing, we have to use
10666 just disp32, not base nor index. */
10667 if (TARGET_64BIT
10668 && (GET_CODE (base) == SYMBOL_REF
10669 || GET_CODE (base) == LABEL_REF))
10670 base = force_reg (mode, base);
10671 if (GET_CODE (new_rtx) == PLUS
10672 && CONSTANT_P (XEXP (new_rtx, 1)))
10674 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
10675 new_rtx = XEXP (new_rtx, 1);
10677 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
10682 return new_rtx;
10685 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10687 static rtx
10688 get_thread_pointer (machine_mode tp_mode, bool to_reg)
10690 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10692 if (GET_MODE (tp) != tp_mode)
10694 gcc_assert (GET_MODE (tp) == SImode);
10695 gcc_assert (tp_mode == DImode);
10697 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
10700 if (to_reg)
10701 tp = copy_to_mode_reg (tp_mode, tp);
10703 return tp;
10706 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10708 static GTY(()) rtx ix86_tls_symbol;
10710 static rtx
10711 ix86_tls_get_addr (void)
10713 if (!ix86_tls_symbol)
10715 const char *sym
10716 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
10717 ? "___tls_get_addr" : "__tls_get_addr");
10719 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
10722 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
10724 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
10725 UNSPEC_PLTOFF);
10726 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
10727 gen_rtx_CONST (Pmode, unspec));
10730 return ix86_tls_symbol;
10733 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
10735 static GTY(()) rtx ix86_tls_module_base_symbol;
10738 ix86_tls_module_base (void)
10740 if (!ix86_tls_module_base_symbol)
10742 ix86_tls_module_base_symbol
10743 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
10745 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
10746 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
10749 return ix86_tls_module_base_symbol;
10752 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10753 false if we expect this to be used for a memory address and true if
10754 we expect to load the address into a register. */
10757 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
10759 rtx dest, base, off;
10760 rtx pic = NULL_RTX, tp = NULL_RTX;
10761 machine_mode tp_mode = Pmode;
10762 int type;
10764 /* Fall back to global dynamic model if tool chain cannot support local
10765 dynamic. */
10766 if (TARGET_SUN_TLS && !TARGET_64BIT
10767 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
10768 && model == TLS_MODEL_LOCAL_DYNAMIC)
10769 model = TLS_MODEL_GLOBAL_DYNAMIC;
10771 switch (model)
10773 case TLS_MODEL_GLOBAL_DYNAMIC:
10774 dest = gen_reg_rtx (Pmode);
10776 if (!TARGET_64BIT)
10778 if (flag_pic && !TARGET_PECOFF)
10779 pic = pic_offset_table_rtx;
10780 else
10782 pic = gen_reg_rtx (Pmode);
10783 emit_insn (gen_set_got (pic));
10787 if (TARGET_GNU2_TLS)
10789 if (TARGET_64BIT)
10790 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
10791 else
10792 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
10794 tp = get_thread_pointer (Pmode, true);
10795 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
10797 if (GET_MODE (x) != Pmode)
10798 x = gen_rtx_ZERO_EXTEND (Pmode, x);
10800 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
10802 else
10804 rtx caddr = ix86_tls_get_addr ();
10806 if (TARGET_64BIT)
10808 rtx rax = gen_rtx_REG (Pmode, AX_REG);
10809 rtx_insn *insns;
10811 start_sequence ();
10812 emit_call_insn
10813 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
10814 insns = get_insns ();
10815 end_sequence ();
10817 if (GET_MODE (x) != Pmode)
10818 x = gen_rtx_ZERO_EXTEND (Pmode, x);
10820 RTL_CONST_CALL_P (insns) = 1;
10821 emit_libcall_block (insns, dest, rax, x);
10823 else
10824 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
10826 break;
10828 case TLS_MODEL_LOCAL_DYNAMIC:
10829 base = gen_reg_rtx (Pmode);
10831 if (!TARGET_64BIT)
10833 if (flag_pic)
10834 pic = pic_offset_table_rtx;
10835 else
10837 pic = gen_reg_rtx (Pmode);
10838 emit_insn (gen_set_got (pic));
10842 if (TARGET_GNU2_TLS)
10844 rtx tmp = ix86_tls_module_base ();
10846 if (TARGET_64BIT)
10847 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
10848 else
10849 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
10851 tp = get_thread_pointer (Pmode, true);
10852 set_unique_reg_note (get_last_insn (), REG_EQUAL,
10853 gen_rtx_MINUS (Pmode, tmp, tp));
10855 else
10857 rtx caddr = ix86_tls_get_addr ();
10859 if (TARGET_64BIT)
10861 rtx rax = gen_rtx_REG (Pmode, AX_REG);
10862 rtx_insn *insns;
10863 rtx eqv;
10865 start_sequence ();
10866 emit_call_insn
10867 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
10868 insns = get_insns ();
10869 end_sequence ();
10871 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
10872 share the LD_BASE result with other LD model accesses. */
10873 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10874 UNSPEC_TLS_LD_BASE);
10876 RTL_CONST_CALL_P (insns) = 1;
10877 emit_libcall_block (insns, base, rax, eqv);
10879 else
10880 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
10883 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
10884 off = gen_rtx_CONST (Pmode, off);
10886 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
10888 if (TARGET_GNU2_TLS)
10890 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
10892 if (GET_MODE (x) != Pmode)
10893 x = gen_rtx_ZERO_EXTEND (Pmode, x);
10895 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
10897 break;
10899 case TLS_MODEL_INITIAL_EXEC:
10900 if (TARGET_64BIT)
10902 if (TARGET_SUN_TLS && !TARGET_X32)
10904 /* The Sun linker took the AMD64 TLS spec literally
10905 and can only handle %rax as destination of the
10906 initial executable code sequence. */
10908 dest = gen_reg_rtx (DImode);
10909 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
10910 return dest;
10913 /* Generate DImode references to avoid %fs:(%reg32)
10914 problems and linker IE->LE relaxation bug. */
10915 tp_mode = DImode;
10916 pic = NULL;
10917 type = UNSPEC_GOTNTPOFF;
10919 else if (flag_pic)
10921 pic = pic_offset_table_rtx;
10922 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
10924 else if (!TARGET_ANY_GNU_TLS)
10926 pic = gen_reg_rtx (Pmode);
10927 emit_insn (gen_set_got (pic));
10928 type = UNSPEC_GOTTPOFF;
10930 else
10932 pic = NULL;
10933 type = UNSPEC_INDNTPOFF;
10936 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
10937 off = gen_rtx_CONST (tp_mode, off);
10938 if (pic)
10939 off = gen_rtx_PLUS (tp_mode, pic, off);
10940 off = gen_const_mem (tp_mode, off);
10941 set_mem_alias_set (off, ix86_GOT_alias_set ());
10943 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10945 base = get_thread_pointer (tp_mode,
10946 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10947 off = force_reg (tp_mode, off);
10948 dest = gen_rtx_PLUS (tp_mode, base, off);
10949 if (tp_mode != Pmode)
10950 dest = convert_to_mode (Pmode, dest, 1);
10952 else
10954 base = get_thread_pointer (Pmode, true);
10955 dest = gen_reg_rtx (Pmode);
10956 emit_insn (ix86_gen_sub3 (dest, base, off));
10958 break;
10960 case TLS_MODEL_LOCAL_EXEC:
10961 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
10962 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10963 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
10964 off = gen_rtx_CONST (Pmode, off);
10966 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10968 base = get_thread_pointer (Pmode,
10969 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10970 return gen_rtx_PLUS (Pmode, base, off);
10972 else
10974 base = get_thread_pointer (Pmode, true);
10975 dest = gen_reg_rtx (Pmode);
10976 emit_insn (ix86_gen_sub3 (dest, base, off));
10978 break;
10980 default:
10981 gcc_unreachable ();
10984 return dest;
10987 /* Return true if OP refers to a TLS address. */
10988 bool
10989 ix86_tls_address_pattern_p (rtx op)
10991 subrtx_var_iterator::array_type array;
10992 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
10994 rtx op = *iter;
10995 if (MEM_P (op))
10997 rtx *x = &XEXP (op, 0);
10998 while (GET_CODE (*x) == PLUS)
11000 int i;
11001 for (i = 0; i < 2; i++)
11003 rtx u = XEXP (*x, i);
11004 if (GET_CODE (u) == ZERO_EXTEND)
11005 u = XEXP (u, 0);
11006 if (GET_CODE (u) == UNSPEC
11007 && XINT (u, 1) == UNSPEC_TP)
11008 return true;
11010 x = &XEXP (*x, 0);
11013 iter.skip_subrtxes ();
11017 return false;
11020 /* Rewrite *LOC so that it refers to a default TLS address space. */
11021 void
11022 ix86_rewrite_tls_address_1 (rtx *loc)
11024 subrtx_ptr_iterator::array_type array;
11025 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
11027 rtx *loc = *iter;
11028 if (MEM_P (*loc))
11030 rtx addr = XEXP (*loc, 0);
11031 rtx *x = &addr;
11032 while (GET_CODE (*x) == PLUS)
11034 int i;
11035 for (i = 0; i < 2; i++)
11037 rtx u = XEXP (*x, i);
11038 if (GET_CODE (u) == ZERO_EXTEND)
11039 u = XEXP (u, 0);
11040 if (GET_CODE (u) == UNSPEC
11041 && XINT (u, 1) == UNSPEC_TP)
11043 addr_space_t as = DEFAULT_TLS_SEG_REG;
11045 *x = XEXP (*x, 1 - i);
11047 *loc = replace_equiv_address_nv (*loc, addr, true);
11048 set_mem_addr_space (*loc, as);
11049 return;
11052 x = &XEXP (*x, 0);
11055 iter.skip_subrtxes ();
11060 /* Rewrite instruction pattern involvning TLS address
11061 so that it refers to a default TLS address space. */
11063 ix86_rewrite_tls_address (rtx pattern)
11065 pattern = copy_insn (pattern);
11066 ix86_rewrite_tls_address_1 (&pattern);
11067 return pattern;
11070 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11071 to symbol DECL if BEIMPORT is true. Otherwise create or return the
11072 unique refptr-DECL symbol corresponding to symbol DECL. */
11074 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
11076 static inline hashval_t hash (tree_map *m) { return m->hash; }
11077 static inline bool
11078 equal (tree_map *a, tree_map *b)
11080 return a->base.from == b->base.from;
11083 static int
11084 keep_cache_entry (tree_map *&m)
11086 return ggc_marked_p (m->base.from);
11090 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
11092 static tree
11093 get_dllimport_decl (tree decl, bool beimport)
11095 struct tree_map *h, in;
11096 const char *name;
11097 const char *prefix;
11098 size_t namelen, prefixlen;
11099 char *imp_name;
11100 tree to;
11101 rtx rtl;
11103 if (!dllimport_map)
11104 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
11106 in.hash = htab_hash_pointer (decl);
11107 in.base.from = decl;
11108 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
11109 h = *loc;
11110 if (h)
11111 return h->to;
11113 *loc = h = ggc_alloc<tree_map> ();
11114 h->hash = in.hash;
11115 h->base.from = decl;
11116 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11117 VAR_DECL, NULL, ptr_type_node);
11118 DECL_ARTIFICIAL (to) = 1;
11119 DECL_IGNORED_P (to) = 1;
11120 DECL_EXTERNAL (to) = 1;
11121 TREE_READONLY (to) = 1;
11123 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11124 name = targetm.strip_name_encoding (name);
11125 if (beimport)
11126 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11127 ? "*__imp_" : "*__imp__";
11128 else
11129 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
11130 namelen = strlen (name);
11131 prefixlen = strlen (prefix);
11132 imp_name = (char *) alloca (namelen + prefixlen + 1);
11133 memcpy (imp_name, prefix, prefixlen);
11134 memcpy (imp_name + prefixlen, name, namelen + 1);
11136 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11137 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11138 SET_SYMBOL_REF_DECL (rtl, to);
11139 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
11140 if (!beimport)
11142 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
11143 #ifdef SUB_TARGET_RECORD_STUB
11144 SUB_TARGET_RECORD_STUB (name);
11145 #endif
11148 rtl = gen_const_mem (Pmode, rtl);
11149 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11151 SET_DECL_RTL (to, rtl);
11152 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11154 return to;
11157 /* Expand SYMBOL into its corresponding far-address symbol.
11158 WANT_REG is true if we require the result be a register. */
11160 static rtx
11161 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
11163 tree imp_decl;
11164 rtx x;
11166 gcc_assert (SYMBOL_REF_DECL (symbol));
11167 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
11169 x = DECL_RTL (imp_decl);
11170 if (want_reg)
11171 x = force_reg (Pmode, x);
11172 return x;
11175 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11176 true if we require the result be a register. */
11178 static rtx
11179 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11181 tree imp_decl;
11182 rtx x;
11184 gcc_assert (SYMBOL_REF_DECL (symbol));
11185 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
11187 x = DECL_RTL (imp_decl);
11188 if (want_reg)
11189 x = force_reg (Pmode, x);
11190 return x;
11193 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
11194 is true if we require the result be a register. */
11197 legitimize_pe_coff_symbol (rtx addr, bool inreg)
11199 if (!TARGET_PECOFF)
11200 return NULL_RTX;
11202 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11204 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11205 return legitimize_dllimport_symbol (addr, inreg);
11206 if (GET_CODE (addr) == CONST
11207 && GET_CODE (XEXP (addr, 0)) == PLUS
11208 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11209 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11211 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
11212 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11216 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
11217 return NULL_RTX;
11218 if (GET_CODE (addr) == SYMBOL_REF
11219 && !is_imported_p (addr)
11220 && SYMBOL_REF_EXTERNAL_P (addr)
11221 && SYMBOL_REF_DECL (addr))
11222 return legitimize_pe_coff_extern_decl (addr, inreg);
11224 if (GET_CODE (addr) == CONST
11225 && GET_CODE (XEXP (addr, 0)) == PLUS
11226 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11227 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
11228 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
11229 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
11231 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
11232 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11234 return NULL_RTX;
11237 /* Try machine-dependent ways of modifying an illegitimate address
11238 to be legitimate. If we find one, return the new, valid address.
11239 This macro is used in only one place: `memory_address' in explow.c.
11241 OLDX is the address as it was before break_out_memory_refs was called.
11242 In some cases it is useful to look at this to decide what needs to be done.
11244 It is always safe for this macro to do nothing. It exists to recognize
11245 opportunities to optimize the output.
11247 For the 80386, we handle X+REG by loading X into a register R and
11248 using R+REG. R will go in a general reg and indexing will be used.
11249 However, if REG is a broken-out memory address or multiplication,
11250 nothing needs to be done because REG can certainly go in a general reg.
11252 When -fpic is used, special handling is needed for symbolic references.
11253 See comments by legitimize_pic_address in i386.c for details. */
11255 static rtx
11256 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
11258 bool changed = false;
11259 unsigned log;
11261 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11262 if (log)
11263 return legitimize_tls_address (x, (enum tls_model) log, false);
11264 if (GET_CODE (x) == CONST
11265 && GET_CODE (XEXP (x, 0)) == PLUS
11266 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11267 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11269 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11270 (enum tls_model) log, false);
11271 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11274 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11276 rtx tmp = legitimize_pe_coff_symbol (x, true);
11277 if (tmp)
11278 return tmp;
11281 if (flag_pic && SYMBOLIC_CONST (x))
11282 return legitimize_pic_address (x, 0);
11284 #if TARGET_MACHO
11285 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
11286 return machopic_indirect_data_reference (x, 0);
11287 #endif
11289 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11290 if (GET_CODE (x) == ASHIFT
11291 && CONST_INT_P (XEXP (x, 1))
11292 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11294 changed = true;
11295 log = INTVAL (XEXP (x, 1));
11296 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11297 GEN_INT (1 << log));
11300 if (GET_CODE (x) == PLUS)
11302 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11304 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11305 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11306 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11308 changed = true;
11309 log = INTVAL (XEXP (XEXP (x, 0), 1));
11310 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11311 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11312 GEN_INT (1 << log));
11315 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11316 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11317 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11319 changed = true;
11320 log = INTVAL (XEXP (XEXP (x, 1), 1));
11321 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11322 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11323 GEN_INT (1 << log));
11326 /* Put multiply first if it isn't already. */
11327 if (GET_CODE (XEXP (x, 1)) == MULT)
11329 std::swap (XEXP (x, 0), XEXP (x, 1));
11330 changed = true;
11333 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11334 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11335 created by virtual register instantiation, register elimination, and
11336 similar optimizations. */
11337 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11339 changed = true;
11340 x = gen_rtx_PLUS (Pmode,
11341 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11342 XEXP (XEXP (x, 1), 0)),
11343 XEXP (XEXP (x, 1), 1));
11346 /* Canonicalize
11347 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11348 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11349 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11350 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11351 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11352 && CONSTANT_P (XEXP (x, 1)))
11354 rtx constant;
11355 rtx other = NULL_RTX;
11357 if (CONST_INT_P (XEXP (x, 1)))
11359 constant = XEXP (x, 1);
11360 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11362 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11364 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11365 other = XEXP (x, 1);
11367 else
11368 constant = 0;
11370 if (constant)
11372 changed = true;
11373 x = gen_rtx_PLUS (Pmode,
11374 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11375 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11376 plus_constant (Pmode, other,
11377 INTVAL (constant)));
11381 if (changed && ix86_legitimate_address_p (mode, x, false))
11382 return x;
11384 if (GET_CODE (XEXP (x, 0)) == MULT)
11386 changed = true;
11387 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
11390 if (GET_CODE (XEXP (x, 1)) == MULT)
11392 changed = true;
11393 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
11396 if (changed
11397 && REG_P (XEXP (x, 1))
11398 && REG_P (XEXP (x, 0)))
11399 return x;
11401 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11403 changed = true;
11404 x = legitimize_pic_address (x, 0);
11407 if (changed && ix86_legitimate_address_p (mode, x, false))
11408 return x;
11410 if (REG_P (XEXP (x, 0)))
11412 rtx temp = gen_reg_rtx (Pmode);
11413 rtx val = force_operand (XEXP (x, 1), temp);
11414 if (val != temp)
11416 val = convert_to_mode (Pmode, val, 1);
11417 emit_move_insn (temp, val);
11420 XEXP (x, 1) = temp;
11421 return x;
11424 else if (REG_P (XEXP (x, 1)))
11426 rtx temp = gen_reg_rtx (Pmode);
11427 rtx val = force_operand (XEXP (x, 0), temp);
11428 if (val != temp)
11430 val = convert_to_mode (Pmode, val, 1);
11431 emit_move_insn (temp, val);
11434 XEXP (x, 0) = temp;
11435 return x;
11439 return x;
11442 /* Print an integer constant expression in assembler syntax. Addition
11443 and subtraction are the only arithmetic that may appear in these
11444 expressions. FILE is the stdio stream to write to, X is the rtx, and
11445 CODE is the operand print code from the output string. */
11447 static void
11448 output_pic_addr_const (FILE *file, rtx x, int code)
11450 char buf[256];
11452 switch (GET_CODE (x))
11454 case PC:
11455 gcc_assert (flag_pic);
11456 putc ('.', file);
11457 break;
11459 case SYMBOL_REF:
11460 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
11461 output_addr_const (file, x);
11462 else
11464 const char *name = XSTR (x, 0);
11466 /* Mark the decl as referenced so that cgraph will
11467 output the function. */
11468 if (SYMBOL_REF_DECL (x))
11469 mark_decl_referenced (SYMBOL_REF_DECL (x));
11471 #if TARGET_MACHO
11472 if (MACHOPIC_INDIRECT
11473 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11474 name = machopic_indirection_name (x, /*stub_p=*/true);
11475 #endif
11476 assemble_name (file, name);
11478 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
11479 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11480 fputs ("@PLT", file);
11481 break;
11483 case LABEL_REF:
11484 x = XEXP (x, 0);
11485 /* FALLTHRU */
11486 case CODE_LABEL:
11487 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11488 assemble_name (asm_out_file, buf);
11489 break;
11491 case CONST_INT:
11492 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11493 break;
11495 case CONST:
11496 /* This used to output parentheses around the expression,
11497 but that does not work on the 386 (either ATT or BSD assembler). */
11498 output_pic_addr_const (file, XEXP (x, 0), code);
11499 break;
11501 case CONST_DOUBLE:
11502 /* We can't handle floating point constants;
11503 TARGET_PRINT_OPERAND must handle them. */
11504 output_operand_lossage ("floating constant misused");
11505 break;
11507 case PLUS:
11508 /* Some assemblers need integer constants to appear first. */
11509 if (CONST_INT_P (XEXP (x, 0)))
11511 output_pic_addr_const (file, XEXP (x, 0), code);
11512 putc ('+', file);
11513 output_pic_addr_const (file, XEXP (x, 1), code);
11515 else
11517 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11518 output_pic_addr_const (file, XEXP (x, 1), code);
11519 putc ('+', file);
11520 output_pic_addr_const (file, XEXP (x, 0), code);
11522 break;
11524 case MINUS:
11525 if (!TARGET_MACHO)
11526 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11527 output_pic_addr_const (file, XEXP (x, 0), code);
11528 putc ('-', file);
11529 output_pic_addr_const (file, XEXP (x, 1), code);
11530 if (!TARGET_MACHO)
11531 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11532 break;
11534 case UNSPEC:
11535 gcc_assert (XVECLEN (x, 0) == 1);
11536 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11537 switch (XINT (x, 1))
11539 case UNSPEC_GOT:
11540 fputs ("@GOT", file);
11541 break;
11542 case UNSPEC_GOTOFF:
11543 fputs ("@GOTOFF", file);
11544 break;
11545 case UNSPEC_PLTOFF:
11546 fputs ("@PLTOFF", file);
11547 break;
11548 case UNSPEC_PCREL:
11549 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11550 "(%rip)" : "[rip]", file);
11551 break;
11552 case UNSPEC_GOTPCREL:
11553 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11554 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11555 break;
11556 case UNSPEC_GOTTPOFF:
11557 /* FIXME: This might be @TPOFF in Sun ld too. */
11558 fputs ("@gottpoff", file);
11559 break;
11560 case UNSPEC_TPOFF:
11561 fputs ("@tpoff", file);
11562 break;
11563 case UNSPEC_NTPOFF:
11564 if (TARGET_64BIT)
11565 fputs ("@tpoff", file);
11566 else
11567 fputs ("@ntpoff", file);
11568 break;
11569 case UNSPEC_DTPOFF:
11570 fputs ("@dtpoff", file);
11571 break;
11572 case UNSPEC_GOTNTPOFF:
11573 if (TARGET_64BIT)
11574 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11575 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11576 else
11577 fputs ("@gotntpoff", file);
11578 break;
11579 case UNSPEC_INDNTPOFF:
11580 fputs ("@indntpoff", file);
11581 break;
11582 #if TARGET_MACHO
11583 case UNSPEC_MACHOPIC_OFFSET:
11584 putc ('-', file);
11585 machopic_output_function_base_name (file);
11586 break;
11587 #endif
11588 default:
11589 output_operand_lossage ("invalid UNSPEC as operand");
11590 break;
11592 break;
11594 default:
11595 output_operand_lossage ("invalid expression as operand");
11599 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11600 We need to emit DTP-relative relocations. */
11602 static void ATTRIBUTE_UNUSED
11603 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11605 fputs (ASM_LONG, file);
11606 output_addr_const (file, x);
11607 fputs ("@dtpoff", file);
11608 switch (size)
11610 case 4:
11611 break;
11612 case 8:
11613 fputs (", 0", file);
11614 break;
11615 default:
11616 gcc_unreachable ();
11620 /* Return true if X is a representation of the PIC register. This copes
11621 with calls from ix86_find_base_term, where the register might have
11622 been replaced by a cselib value. */
11624 static bool
11625 ix86_pic_register_p (rtx x)
11627 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11628 return (pic_offset_table_rtx
11629 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11630 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
11631 return true;
11632 else if (!REG_P (x))
11633 return false;
11634 else if (pic_offset_table_rtx)
11636 if (REGNO (x) == REGNO (pic_offset_table_rtx))
11637 return true;
11638 if (HARD_REGISTER_P (x)
11639 && !HARD_REGISTER_P (pic_offset_table_rtx)
11640 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
11641 return true;
11642 return false;
11644 else
11645 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11648 /* Helper function for ix86_delegitimize_address.
11649 Attempt to delegitimize TLS local-exec accesses. */
11651 static rtx
11652 ix86_delegitimize_tls_address (rtx orig_x)
11654 rtx x = orig_x, unspec;
11655 struct ix86_address addr;
11657 if (!TARGET_TLS_DIRECT_SEG_REFS)
11658 return orig_x;
11659 if (MEM_P (x))
11660 x = XEXP (x, 0);
11661 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
11662 return orig_x;
11663 if (ix86_decompose_address (x, &addr) == 0
11664 || addr.seg != DEFAULT_TLS_SEG_REG
11665 || addr.disp == NULL_RTX
11666 || GET_CODE (addr.disp) != CONST)
11667 return orig_x;
11668 unspec = XEXP (addr.disp, 0);
11669 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
11670 unspec = XEXP (unspec, 0);
11671 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
11672 return orig_x;
11673 x = XVECEXP (unspec, 0, 0);
11674 gcc_assert (GET_CODE (x) == SYMBOL_REF);
11675 if (unspec != XEXP (addr.disp, 0))
11676 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
11677 if (addr.index)
11679 rtx idx = addr.index;
11680 if (addr.scale != 1)
11681 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
11682 x = gen_rtx_PLUS (Pmode, idx, x);
11684 if (addr.base)
11685 x = gen_rtx_PLUS (Pmode, addr.base, x);
11686 if (MEM_P (orig_x))
11687 x = replace_equiv_address_nv (orig_x, x);
11688 return x;
11691 /* In the name of slightly smaller debug output, and to cater to
11692 general assembler lossage, recognize PIC+GOTOFF and turn it back
11693 into a direct symbol reference.
11695 On Darwin, this is necessary to avoid a crash, because Darwin
11696 has a different PIC label for each routine but the DWARF debugging
11697 information is not associated with any particular routine, so it's
11698 necessary to remove references to the PIC label from RTL stored by
11699 the DWARF output code.
11701 This helper is used in the normal ix86_delegitimize_address
11702 entrypoint (e.g. used in the target delegitimization hook) and
11703 in ix86_find_base_term. As compile time memory optimization, we
11704 avoid allocating rtxes that will not change anything on the outcome
11705 of the callers (find_base_value and find_base_term). */
11707 static inline rtx
11708 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
11710 rtx orig_x = delegitimize_mem_from_attrs (x);
11711 /* addend is NULL or some rtx if x is something+GOTOFF where
11712 something doesn't include the PIC register. */
11713 rtx addend = NULL_RTX;
11714 /* reg_addend is NULL or a multiple of some register. */
11715 rtx reg_addend = NULL_RTX;
11716 /* const_addend is NULL or a const_int. */
11717 rtx const_addend = NULL_RTX;
11718 /* This is the result, or NULL. */
11719 rtx result = NULL_RTX;
11721 x = orig_x;
11723 if (MEM_P (x))
11724 x = XEXP (x, 0);
11726 if (TARGET_64BIT)
11728 if (GET_CODE (x) == CONST
11729 && GET_CODE (XEXP (x, 0)) == PLUS
11730 && GET_MODE (XEXP (x, 0)) == Pmode
11731 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11732 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
11733 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
11735 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
11736 base. A CONST can't be arg_pointer_rtx based. */
11737 if (base_term_p && MEM_P (orig_x))
11738 return orig_x;
11739 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
11740 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
11741 if (MEM_P (orig_x))
11742 x = replace_equiv_address_nv (orig_x, x);
11743 return x;
11746 if (GET_CODE (x) == CONST
11747 && GET_CODE (XEXP (x, 0)) == UNSPEC
11748 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
11749 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
11750 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
11752 x = XVECEXP (XEXP (x, 0), 0, 0);
11753 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
11755 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
11756 if (x == NULL_RTX)
11757 return orig_x;
11759 return x;
11762 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
11763 return ix86_delegitimize_tls_address (orig_x);
11765 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
11766 and -mcmodel=medium -fpic. */
11769 if (GET_CODE (x) != PLUS
11770 || GET_CODE (XEXP (x, 1)) != CONST)
11771 return ix86_delegitimize_tls_address (orig_x);
11773 if (ix86_pic_register_p (XEXP (x, 0)))
11774 /* %ebx + GOT/GOTOFF */
11776 else if (GET_CODE (XEXP (x, 0)) == PLUS)
11778 /* %ebx + %reg * scale + GOT/GOTOFF */
11779 reg_addend = XEXP (x, 0);
11780 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
11781 reg_addend = XEXP (reg_addend, 1);
11782 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
11783 reg_addend = XEXP (reg_addend, 0);
11784 else
11786 reg_addend = NULL_RTX;
11787 addend = XEXP (x, 0);
11790 else
11791 addend = XEXP (x, 0);
11793 x = XEXP (XEXP (x, 1), 0);
11794 if (GET_CODE (x) == PLUS
11795 && CONST_INT_P (XEXP (x, 1)))
11797 const_addend = XEXP (x, 1);
11798 x = XEXP (x, 0);
11801 if (GET_CODE (x) == UNSPEC
11802 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
11803 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
11804 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
11805 && !MEM_P (orig_x) && !addend)))
11806 result = XVECEXP (x, 0, 0);
11808 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
11809 && !MEM_P (orig_x))
11810 result = XVECEXP (x, 0, 0);
11812 if (! result)
11813 return ix86_delegitimize_tls_address (orig_x);
11815 /* For (PLUS something CONST_INT) both find_base_{value,term} just
11816 recurse on the first operand. */
11817 if (const_addend && !base_term_p)
11818 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
11819 if (reg_addend)
11820 result = gen_rtx_PLUS (Pmode, reg_addend, result);
11821 if (addend)
11823 /* If the rest of original X doesn't involve the PIC register, add
11824 addend and subtract pic_offset_table_rtx. This can happen e.g.
11825 for code like:
11826 leal (%ebx, %ecx, 4), %ecx
11828 movl foo@GOTOFF(%ecx), %edx
11829 in which case we return (%ecx - %ebx) + foo
11830 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
11831 and reload has completed. Don't do the latter for debug,
11832 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
11833 if (pic_offset_table_rtx
11834 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
11835 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
11836 pic_offset_table_rtx),
11837 result);
11838 else if (base_term_p
11839 && pic_offset_table_rtx
11840 && !TARGET_MACHO
11841 && !TARGET_VXWORKS_RTP)
11843 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
11844 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
11845 result = gen_rtx_PLUS (Pmode, tmp, result);
11847 else
11848 return orig_x;
11850 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
11852 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
11853 if (result == NULL_RTX)
11854 return orig_x;
11856 return result;
11859 /* The normal instantiation of the above template. */
11861 static rtx
11862 ix86_delegitimize_address (rtx x)
11864 return ix86_delegitimize_address_1 (x, false);
11867 /* If X is a machine specific address (i.e. a symbol or label being
11868 referenced as a displacement from the GOT implemented using an
11869 UNSPEC), then return the base term. Otherwise return X. */
11872 ix86_find_base_term (rtx x)
11874 rtx term;
11876 if (TARGET_64BIT)
11878 if (GET_CODE (x) != CONST)
11879 return x;
11880 term = XEXP (x, 0);
11881 if (GET_CODE (term) == PLUS
11882 && CONST_INT_P (XEXP (term, 1)))
11883 term = XEXP (term, 0);
11884 if (GET_CODE (term) != UNSPEC
11885 || (XINT (term, 1) != UNSPEC_GOTPCREL
11886 && XINT (term, 1) != UNSPEC_PCREL))
11887 return x;
11889 return XVECEXP (term, 0, 0);
11892 return ix86_delegitimize_address_1 (x, true);
11895 /* Return true if X shouldn't be emitted into the debug info.
11896 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
11897 symbol easily into the .debug_info section, so we need not to
11898 delegitimize, but instead assemble as @gotoff.
11899 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
11900 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
11902 static bool
11903 ix86_const_not_ok_for_debug_p (rtx x)
11905 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
11906 return true;
11908 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
11909 return true;
11911 return false;
11914 static void
11915 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
11916 bool fp, FILE *file)
11918 const char *suffix;
11920 if (mode == CCFPmode)
11922 code = ix86_fp_compare_code_to_integer (code);
11923 mode = CCmode;
11925 if (reverse)
11926 code = reverse_condition (code);
11928 switch (code)
11930 case EQ:
11931 gcc_assert (mode != CCGZmode);
11932 switch (mode)
11934 case E_CCAmode:
11935 suffix = "a";
11936 break;
11937 case E_CCCmode:
11938 suffix = "c";
11939 break;
11940 case E_CCOmode:
11941 suffix = "o";
11942 break;
11943 case E_CCPmode:
11944 suffix = "p";
11945 break;
11946 case E_CCSmode:
11947 suffix = "s";
11948 break;
11949 default:
11950 suffix = "e";
11951 break;
11953 break;
11954 case NE:
11955 gcc_assert (mode != CCGZmode);
11956 switch (mode)
11958 case E_CCAmode:
11959 suffix = "na";
11960 break;
11961 case E_CCCmode:
11962 suffix = "nc";
11963 break;
11964 case E_CCOmode:
11965 suffix = "no";
11966 break;
11967 case E_CCPmode:
11968 suffix = "np";
11969 break;
11970 case E_CCSmode:
11971 suffix = "ns";
11972 break;
11973 default:
11974 suffix = "ne";
11975 break;
11977 break;
11978 case GT:
11979 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
11980 suffix = "g";
11981 break;
11982 case GTU:
11983 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
11984 Those same assemblers have the same but opposite lossage on cmov. */
11985 if (mode == CCmode)
11986 suffix = fp ? "nbe" : "a";
11987 else
11988 gcc_unreachable ();
11989 break;
11990 case LT:
11991 switch (mode)
11993 case E_CCNOmode:
11994 case E_CCGOCmode:
11995 suffix = "s";
11996 break;
11998 case E_CCmode:
11999 case E_CCGCmode:
12000 case E_CCGZmode:
12001 suffix = "l";
12002 break;
12004 default:
12005 gcc_unreachable ();
12007 break;
12008 case LTU:
12009 if (mode == CCmode || mode == CCGZmode)
12010 suffix = "b";
12011 else if (mode == CCCmode)
12012 suffix = fp ? "b" : "c";
12013 else
12014 gcc_unreachable ();
12015 break;
12016 case GE:
12017 switch (mode)
12019 case E_CCNOmode:
12020 case E_CCGOCmode:
12021 suffix = "ns";
12022 break;
12024 case E_CCmode:
12025 case E_CCGCmode:
12026 case E_CCGZmode:
12027 suffix = "ge";
12028 break;
12030 default:
12031 gcc_unreachable ();
12033 break;
12034 case GEU:
12035 if (mode == CCmode || mode == CCGZmode)
12036 suffix = "nb";
12037 else if (mode == CCCmode)
12038 suffix = fp ? "nb" : "nc";
12039 else
12040 gcc_unreachable ();
12041 break;
12042 case LE:
12043 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12044 suffix = "le";
12045 break;
12046 case LEU:
12047 if (mode == CCmode)
12048 suffix = "be";
12049 else
12050 gcc_unreachable ();
12051 break;
12052 case UNORDERED:
12053 suffix = fp ? "u" : "p";
12054 break;
12055 case ORDERED:
12056 suffix = fp ? "nu" : "np";
12057 break;
12058 default:
12059 gcc_unreachable ();
12061 fputs (suffix, file);
12064 /* Print the name of register X to FILE based on its machine mode and number.
12065 If CODE is 'w', pretend the mode is HImode.
12066 If CODE is 'b', pretend the mode is QImode.
12067 If CODE is 'k', pretend the mode is SImode.
12068 If CODE is 'q', pretend the mode is DImode.
12069 If CODE is 'x', pretend the mode is V4SFmode.
12070 If CODE is 't', pretend the mode is V8SFmode.
12071 If CODE is 'g', pretend the mode is V16SFmode.
12072 If CODE is 'h', pretend the reg is the 'high' byte register.
12073 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12074 If CODE is 'd', duplicate the operand for AVX instruction.
12075 If CODE is 'V', print naked full integer register name without %.
12078 void
12079 print_reg (rtx x, int code, FILE *file)
12081 const char *reg;
12082 int msize;
12083 unsigned int regno;
12084 bool duplicated;
12086 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
12087 putc ('%', file);
12089 if (x == pc_rtx)
12091 gcc_assert (TARGET_64BIT);
12092 fputs ("rip", file);
12093 return;
12096 if (code == 'y' && STACK_TOP_P (x))
12098 fputs ("st(0)", file);
12099 return;
12102 if (code == 'w')
12103 msize = 2;
12104 else if (code == 'b')
12105 msize = 1;
12106 else if (code == 'k')
12107 msize = 4;
12108 else if (code == 'q')
12109 msize = 8;
12110 else if (code == 'h')
12111 msize = 0;
12112 else if (code == 'x')
12113 msize = 16;
12114 else if (code == 't')
12115 msize = 32;
12116 else if (code == 'g')
12117 msize = 64;
12118 else
12119 msize = GET_MODE_SIZE (GET_MODE (x));
12121 regno = REGNO (x);
12123 if (regno == ARG_POINTER_REGNUM
12124 || regno == FRAME_POINTER_REGNUM
12125 || regno == FPSR_REG)
12127 output_operand_lossage
12128 ("invalid use of register '%s'", reg_names[regno]);
12129 return;
12131 else if (regno == FLAGS_REG)
12133 output_operand_lossage ("invalid use of asm flag output");
12134 return;
12137 if (code == 'V')
12139 if (GENERAL_REGNO_P (regno))
12140 msize = GET_MODE_SIZE (word_mode);
12141 else
12142 error ("%<V%> modifier on non-integer register");
12145 duplicated = code == 'd' && TARGET_AVX;
12147 switch (msize)
12149 case 16:
12150 case 12:
12151 case 8:
12152 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
12153 warning (0, "unsupported size for integer register");
12154 /* FALLTHRU */
12155 case 4:
12156 if (LEGACY_INT_REGNO_P (regno))
12157 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
12158 /* FALLTHRU */
12159 case 2:
12160 normal:
12161 reg = hi_reg_name[regno];
12162 break;
12163 case 1:
12164 if (regno >= ARRAY_SIZE (qi_reg_name))
12165 goto normal;
12166 if (!ANY_QI_REGNO_P (regno))
12167 error ("unsupported size for integer register");
12168 reg = qi_reg_name[regno];
12169 break;
12170 case 0:
12171 if (regno >= ARRAY_SIZE (qi_high_reg_name))
12172 goto normal;
12173 reg = qi_high_reg_name[regno];
12174 break;
12175 case 32:
12176 case 64:
12177 if (SSE_REGNO_P (regno))
12179 gcc_assert (!duplicated);
12180 putc (msize == 32 ? 'y' : 'z', file);
12181 reg = hi_reg_name[regno] + 1;
12182 break;
12184 goto normal;
12185 default:
12186 gcc_unreachable ();
12189 fputs (reg, file);
12191 /* Irritatingly, AMD extended registers use
12192 different naming convention: "r%d[bwd]" */
12193 if (REX_INT_REGNO_P (regno))
12195 gcc_assert (TARGET_64BIT);
12196 switch (msize)
12198 case 0:
12199 error ("extended registers have no high halves");
12200 break;
12201 case 1:
12202 putc ('b', file);
12203 break;
12204 case 2:
12205 putc ('w', file);
12206 break;
12207 case 4:
12208 putc ('d', file);
12209 break;
12210 case 8:
12211 /* no suffix */
12212 break;
12213 default:
12214 error ("unsupported operand size for extended register");
12215 break;
12217 return;
12220 if (duplicated)
12222 if (ASSEMBLER_DIALECT == ASM_ATT)
12223 fprintf (file, ", %%%s", reg);
12224 else
12225 fprintf (file, ", %s", reg);
12229 /* Meaning of CODE:
12230 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12231 C -- print opcode suffix for set/cmov insn.
12232 c -- like C, but print reversed condition
12233 F,f -- likewise, but for floating-point.
12234 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12235 otherwise nothing
12236 R -- print embedded rounding and sae.
12237 r -- print only sae.
12238 z -- print the opcode suffix for the size of the current operand.
12239 Z -- likewise, with special suffixes for x87 instructions.
12240 * -- print a star (in certain assembler syntax)
12241 A -- print an absolute memory reference.
12242 E -- print address with DImode register names if TARGET_64BIT.
12243 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12244 s -- print a shift double count, followed by the assemblers argument
12245 delimiter.
12246 b -- print the QImode name of the register for the indicated operand.
12247 %b0 would print %al if operands[0] is reg 0.
12248 w -- likewise, print the HImode name of the register.
12249 k -- likewise, print the SImode name of the register.
12250 q -- likewise, print the DImode name of the register.
12251 x -- likewise, print the V4SFmode name of the register.
12252 t -- likewise, print the V8SFmode name of the register.
12253 g -- likewise, print the V16SFmode name of the register.
12254 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12255 y -- print "st(0)" instead of "st" as a register.
12256 d -- print duplicated register operand for AVX instruction.
12257 D -- print condition for SSE cmp instruction.
12258 P -- if PIC, print an @PLT suffix.
12259 p -- print raw symbol name.
12260 X -- don't print any sort of PIC '@' suffix for a symbol.
12261 & -- print some in-use local-dynamic symbol name.
12262 H -- print a memory address offset by 8; used for sse high-parts
12263 Y -- print condition for XOP pcom* instruction.
12264 V -- print naked full integer register name without %.
12265 + -- print a branch hint as 'cs' or 'ds' prefix
12266 ; -- print a semicolon (after prefixes due to bug in older gas).
12267 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
12268 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
12269 M -- print addr32 prefix for TARGET_X32 with VSIB address.
12270 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
12273 void
12274 ix86_print_operand (FILE *file, rtx x, int code)
12276 if (code)
12278 switch (code)
12280 case 'A':
12281 switch (ASSEMBLER_DIALECT)
12283 case ASM_ATT:
12284 putc ('*', file);
12285 break;
12287 case ASM_INTEL:
12288 /* Intel syntax. For absolute addresses, registers should not
12289 be surrounded by braces. */
12290 if (!REG_P (x))
12292 putc ('[', file);
12293 ix86_print_operand (file, x, 0);
12294 putc (']', file);
12295 return;
12297 break;
12299 default:
12300 gcc_unreachable ();
12303 ix86_print_operand (file, x, 0);
12304 return;
12306 case 'E':
12307 /* Wrap address in an UNSPEC to declare special handling. */
12308 if (TARGET_64BIT)
12309 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
12311 output_address (VOIDmode, x);
12312 return;
12314 case 'L':
12315 if (ASSEMBLER_DIALECT == ASM_ATT)
12316 putc ('l', file);
12317 return;
12319 case 'W':
12320 if (ASSEMBLER_DIALECT == ASM_ATT)
12321 putc ('w', file);
12322 return;
12324 case 'B':
12325 if (ASSEMBLER_DIALECT == ASM_ATT)
12326 putc ('b', file);
12327 return;
12329 case 'Q':
12330 if (ASSEMBLER_DIALECT == ASM_ATT)
12331 putc ('l', file);
12332 return;
12334 case 'S':
12335 if (ASSEMBLER_DIALECT == ASM_ATT)
12336 putc ('s', file);
12337 return;
12339 case 'T':
12340 if (ASSEMBLER_DIALECT == ASM_ATT)
12341 putc ('t', file);
12342 return;
12344 case 'O':
12345 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12346 if (ASSEMBLER_DIALECT != ASM_ATT)
12347 return;
12349 switch (GET_MODE_SIZE (GET_MODE (x)))
12351 case 2:
12352 putc ('w', file);
12353 break;
12355 case 4:
12356 putc ('l', file);
12357 break;
12359 case 8:
12360 putc ('q', file);
12361 break;
12363 default:
12364 output_operand_lossage ("invalid operand size for operand "
12365 "code 'O'");
12366 return;
12369 putc ('.', file);
12370 #endif
12371 return;
12373 case 'z':
12374 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12376 /* Opcodes don't get size suffixes if using Intel opcodes. */
12377 if (ASSEMBLER_DIALECT == ASM_INTEL)
12378 return;
12380 switch (GET_MODE_SIZE (GET_MODE (x)))
12382 case 1:
12383 putc ('b', file);
12384 return;
12386 case 2:
12387 putc ('w', file);
12388 return;
12390 case 4:
12391 putc ('l', file);
12392 return;
12394 case 8:
12395 putc ('q', file);
12396 return;
12398 default:
12399 output_operand_lossage ("invalid operand size for operand "
12400 "code 'z'");
12401 return;
12405 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12406 warning (0, "non-integer operand used with operand code %<z%>");
12407 /* FALLTHRU */
12409 case 'Z':
12410 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12411 if (ASSEMBLER_DIALECT == ASM_INTEL)
12412 return;
12414 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12416 switch (GET_MODE_SIZE (GET_MODE (x)))
12418 case 2:
12419 #ifdef HAVE_AS_IX86_FILDS
12420 putc ('s', file);
12421 #endif
12422 return;
12424 case 4:
12425 putc ('l', file);
12426 return;
12428 case 8:
12429 #ifdef HAVE_AS_IX86_FILDQ
12430 putc ('q', file);
12431 #else
12432 fputs ("ll", file);
12433 #endif
12434 return;
12436 default:
12437 break;
12440 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12442 /* 387 opcodes don't get size suffixes
12443 if the operands are registers. */
12444 if (STACK_REG_P (x))
12445 return;
12447 switch (GET_MODE_SIZE (GET_MODE (x)))
12449 case 4:
12450 putc ('s', file);
12451 return;
12453 case 8:
12454 putc ('l', file);
12455 return;
12457 case 12:
12458 case 16:
12459 putc ('t', file);
12460 return;
12462 default:
12463 break;
12466 else
12468 output_operand_lossage ("invalid operand type used with "
12469 "operand code 'Z'");
12470 return;
12473 output_operand_lossage ("invalid operand size for operand code 'Z'");
12474 return;
12476 case 'd':
12477 case 'b':
12478 case 'w':
12479 case 'k':
12480 case 'q':
12481 case 'h':
12482 case 't':
12483 case 'g':
12484 case 'y':
12485 case 'x':
12486 case 'X':
12487 case 'P':
12488 case 'p':
12489 case 'V':
12490 break;
12492 case 's':
12493 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12495 ix86_print_operand (file, x, 0);
12496 fputs (", ", file);
12498 return;
12500 case 'Y':
12501 switch (GET_CODE (x))
12503 case NE:
12504 fputs ("neq", file);
12505 break;
12506 case EQ:
12507 fputs ("eq", file);
12508 break;
12509 case GE:
12510 case GEU:
12511 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12512 break;
12513 case GT:
12514 case GTU:
12515 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12516 break;
12517 case LE:
12518 case LEU:
12519 fputs ("le", file);
12520 break;
12521 case LT:
12522 case LTU:
12523 fputs ("lt", file);
12524 break;
12525 case UNORDERED:
12526 fputs ("unord", file);
12527 break;
12528 case ORDERED:
12529 fputs ("ord", file);
12530 break;
12531 case UNEQ:
12532 fputs ("ueq", file);
12533 break;
12534 case UNGE:
12535 fputs ("nlt", file);
12536 break;
12537 case UNGT:
12538 fputs ("nle", file);
12539 break;
12540 case UNLE:
12541 fputs ("ule", file);
12542 break;
12543 case UNLT:
12544 fputs ("ult", file);
12545 break;
12546 case LTGT:
12547 fputs ("une", file);
12548 break;
12549 default:
12550 output_operand_lossage ("operand is not a condition code, "
12551 "invalid operand code 'Y'");
12552 return;
12554 return;
12556 case 'D':
12557 /* Little bit of braindamage here. The SSE compare instructions
12558 does use completely different names for the comparisons that the
12559 fp conditional moves. */
12560 switch (GET_CODE (x))
12562 case UNEQ:
12563 if (TARGET_AVX)
12565 fputs ("eq_us", file);
12566 break;
12568 /* FALLTHRU */
12569 case EQ:
12570 fputs ("eq", file);
12571 break;
12572 case UNLT:
12573 if (TARGET_AVX)
12575 fputs ("nge", file);
12576 break;
12578 /* FALLTHRU */
12579 case LT:
12580 fputs ("lt", file);
12581 break;
12582 case UNLE:
12583 if (TARGET_AVX)
12585 fputs ("ngt", file);
12586 break;
12588 /* FALLTHRU */
12589 case LE:
12590 fputs ("le", file);
12591 break;
12592 case UNORDERED:
12593 fputs ("unord", file);
12594 break;
12595 case LTGT:
12596 if (TARGET_AVX)
12598 fputs ("neq_oq", file);
12599 break;
12601 /* FALLTHRU */
12602 case NE:
12603 fputs ("neq", file);
12604 break;
12605 case GE:
12606 if (TARGET_AVX)
12608 fputs ("ge", file);
12609 break;
12611 /* FALLTHRU */
12612 case UNGE:
12613 fputs ("nlt", file);
12614 break;
12615 case GT:
12616 if (TARGET_AVX)
12618 fputs ("gt", file);
12619 break;
12621 /* FALLTHRU */
12622 case UNGT:
12623 fputs ("nle", file);
12624 break;
12625 case ORDERED:
12626 fputs ("ord", file);
12627 break;
12628 default:
12629 output_operand_lossage ("operand is not a condition code, "
12630 "invalid operand code 'D'");
12631 return;
12633 return;
12635 case 'F':
12636 case 'f':
12637 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12638 if (ASSEMBLER_DIALECT == ASM_ATT)
12639 putc ('.', file);
12640 gcc_fallthrough ();
12641 #endif
12643 case 'C':
12644 case 'c':
12645 if (!COMPARISON_P (x))
12647 output_operand_lossage ("operand is not a condition code, "
12648 "invalid operand code '%c'", code);
12649 return;
12651 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
12652 code == 'c' || code == 'f',
12653 code == 'F' || code == 'f',
12654 file);
12655 return;
12657 case 'H':
12658 if (!offsettable_memref_p (x))
12660 output_operand_lossage ("operand is not an offsettable memory "
12661 "reference, invalid operand code 'H'");
12662 return;
12664 /* It doesn't actually matter what mode we use here, as we're
12665 only going to use this for printing. */
12666 x = adjust_address_nv (x, DImode, 8);
12667 /* Output 'qword ptr' for intel assembler dialect. */
12668 if (ASSEMBLER_DIALECT == ASM_INTEL)
12669 code = 'q';
12670 break;
12672 case 'K':
12673 if (!CONST_INT_P (x))
12675 output_operand_lossage ("operand is not an integer, invalid "
12676 "operand code 'K'");
12677 return;
12680 if (INTVAL (x) & IX86_HLE_ACQUIRE)
12681 #ifdef HAVE_AS_IX86_HLE
12682 fputs ("xacquire ", file);
12683 #else
12684 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
12685 #endif
12686 else if (INTVAL (x) & IX86_HLE_RELEASE)
12687 #ifdef HAVE_AS_IX86_HLE
12688 fputs ("xrelease ", file);
12689 #else
12690 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
12691 #endif
12692 /* We do not want to print value of the operand. */
12693 return;
12695 case 'N':
12696 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
12697 fputs ("{z}", file);
12698 return;
12700 case 'r':
12701 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
12703 output_operand_lossage ("operand is not a specific integer, "
12704 "invalid operand code 'r'");
12705 return;
12708 if (ASSEMBLER_DIALECT == ASM_INTEL)
12709 fputs (", ", file);
12711 fputs ("{sae}", file);
12713 if (ASSEMBLER_DIALECT == ASM_ATT)
12714 fputs (", ", file);
12716 return;
12718 case 'R':
12719 if (!CONST_INT_P (x))
12721 output_operand_lossage ("operand is not an integer, invalid "
12722 "operand code 'R'");
12723 return;
12726 if (ASSEMBLER_DIALECT == ASM_INTEL)
12727 fputs (", ", file);
12729 switch (INTVAL (x))
12731 case ROUND_NEAREST_INT | ROUND_SAE:
12732 fputs ("{rn-sae}", file);
12733 break;
12734 case ROUND_NEG_INF | ROUND_SAE:
12735 fputs ("{rd-sae}", file);
12736 break;
12737 case ROUND_POS_INF | ROUND_SAE:
12738 fputs ("{ru-sae}", file);
12739 break;
12740 case ROUND_ZERO | ROUND_SAE:
12741 fputs ("{rz-sae}", file);
12742 break;
12743 default:
12744 output_operand_lossage ("operand is not a specific integer, "
12745 "invalid operand code 'R'");
12748 if (ASSEMBLER_DIALECT == ASM_ATT)
12749 fputs (", ", file);
12751 return;
12753 case '*':
12754 if (ASSEMBLER_DIALECT == ASM_ATT)
12755 putc ('*', file);
12756 return;
12758 case '&':
12760 const char *name = get_some_local_dynamic_name ();
12761 if (name == NULL)
12762 output_operand_lossage ("'%%&' used without any "
12763 "local dynamic TLS references");
12764 else
12765 assemble_name (file, name);
12766 return;
12769 case '+':
12771 rtx x;
12773 if (!optimize
12774 || optimize_function_for_size_p (cfun)
12775 || !TARGET_BRANCH_PREDICTION_HINTS)
12776 return;
12778 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
12779 if (x)
12781 int pred_val = profile_probability::from_reg_br_prob_note
12782 (XINT (x, 0)).to_reg_br_prob_base ();
12784 if (pred_val < REG_BR_PROB_BASE * 45 / 100
12785 || pred_val > REG_BR_PROB_BASE * 55 / 100)
12787 bool taken = pred_val > REG_BR_PROB_BASE / 2;
12788 bool cputaken
12789 = final_forward_branch_p (current_output_insn) == 0;
12791 /* Emit hints only in the case default branch prediction
12792 heuristics would fail. */
12793 if (taken != cputaken)
12795 /* We use 3e (DS) prefix for taken branches and
12796 2e (CS) prefix for not taken branches. */
12797 if (taken)
12798 fputs ("ds ; ", file);
12799 else
12800 fputs ("cs ; ", file);
12804 return;
12807 case ';':
12808 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12809 putc (';', file);
12810 #endif
12811 return;
12813 case '~':
12814 putc (TARGET_AVX2 ? 'i' : 'f', file);
12815 return;
12817 case 'M':
12818 if (TARGET_X32)
12820 /* NB: 32-bit indices in VSIB address are sign-extended
12821 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
12822 sign-extended to 0xfffffffff7fa3010 which is invalid
12823 address. Add addr32 prefix if there is no base
12824 register nor symbol. */
12825 bool ok;
12826 struct ix86_address parts;
12827 ok = ix86_decompose_address (x, &parts);
12828 gcc_assert (ok && parts.index == NULL_RTX);
12829 if (parts.base == NULL_RTX
12830 && (parts.disp == NULL_RTX
12831 || !symbolic_operand (parts.disp,
12832 GET_MODE (parts.disp))))
12833 fputs ("addr32 ", file);
12835 return;
12837 case '^':
12838 if (TARGET_64BIT && Pmode != word_mode)
12839 fputs ("addr32 ", file);
12840 return;
12842 case '!':
12843 if (ix86_notrack_prefixed_insn_p (current_output_insn))
12844 fputs ("notrack ", file);
12845 return;
12847 default:
12848 output_operand_lossage ("invalid operand code '%c'", code);
12852 if (REG_P (x))
12853 print_reg (x, code, file);
12855 else if (MEM_P (x))
12857 rtx addr = XEXP (x, 0);
12859 /* No `byte ptr' prefix for call instructions ... */
12860 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
12862 machine_mode mode = GET_MODE (x);
12863 const char *size;
12865 /* Check for explicit size override codes. */
12866 if (code == 'b')
12867 size = "BYTE";
12868 else if (code == 'w')
12869 size = "WORD";
12870 else if (code == 'k')
12871 size = "DWORD";
12872 else if (code == 'q')
12873 size = "QWORD";
12874 else if (code == 'x')
12875 size = "XMMWORD";
12876 else if (code == 't')
12877 size = "YMMWORD";
12878 else if (code == 'g')
12879 size = "ZMMWORD";
12880 else if (mode == BLKmode)
12881 /* ... or BLKmode operands, when not overridden. */
12882 size = NULL;
12883 else
12884 switch (GET_MODE_SIZE (mode))
12886 case 1: size = "BYTE"; break;
12887 case 2: size = "WORD"; break;
12888 case 4: size = "DWORD"; break;
12889 case 8: size = "QWORD"; break;
12890 case 12: size = "TBYTE"; break;
12891 case 16:
12892 if (mode == XFmode)
12893 size = "TBYTE";
12894 else
12895 size = "XMMWORD";
12896 break;
12897 case 32: size = "YMMWORD"; break;
12898 case 64: size = "ZMMWORD"; break;
12899 default:
12900 gcc_unreachable ();
12902 if (size)
12904 fputs (size, file);
12905 fputs (" PTR ", file);
12909 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
12910 output_operand_lossage ("invalid constraints for operand");
12911 else
12912 ix86_print_operand_address_as
12913 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
12916 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
12918 long l;
12920 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
12922 if (ASSEMBLER_DIALECT == ASM_ATT)
12923 putc ('$', file);
12924 /* Sign extend 32bit SFmode immediate to 8 bytes. */
12925 if (code == 'q')
12926 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
12927 (unsigned long long) (int) l);
12928 else
12929 fprintf (file, "0x%08x", (unsigned int) l);
12932 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
12934 long l[2];
12936 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
12938 if (ASSEMBLER_DIALECT == ASM_ATT)
12939 putc ('$', file);
12940 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
12943 /* These float cases don't actually occur as immediate operands. */
12944 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
12946 char dstr[30];
12948 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12949 fputs (dstr, file);
12952 else
12954 /* We have patterns that allow zero sets of memory, for instance.
12955 In 64-bit mode, we should probably support all 8-byte vectors,
12956 since we can in fact encode that into an immediate. */
12957 if (GET_CODE (x) == CONST_VECTOR)
12959 if (x != CONST0_RTX (GET_MODE (x)))
12960 output_operand_lossage ("invalid vector immediate");
12961 x = const0_rtx;
12964 if (code != 'P' && code != 'p')
12966 if (CONST_INT_P (x))
12968 if (ASSEMBLER_DIALECT == ASM_ATT)
12969 putc ('$', file);
12971 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
12972 || GET_CODE (x) == LABEL_REF)
12974 if (ASSEMBLER_DIALECT == ASM_ATT)
12975 putc ('$', file);
12976 else
12977 fputs ("OFFSET FLAT:", file);
12980 if (CONST_INT_P (x))
12981 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12982 else if (flag_pic || MACHOPIC_INDIRECT)
12983 output_pic_addr_const (file, x, code);
12984 else
12985 output_addr_const (file, x);
12989 static bool
12990 ix86_print_operand_punct_valid_p (unsigned char code)
12992 return (code == '*' || code == '+' || code == '&' || code == ';'
12993 || code == '~' || code == '^' || code == '!');
12996 /* Print a memory operand whose address is ADDR. */
12998 static void
12999 ix86_print_operand_address_as (FILE *file, rtx addr,
13000 addr_space_t as, bool no_rip)
13002 struct ix86_address parts;
13003 rtx base, index, disp;
13004 int scale;
13005 int ok;
13006 bool vsib = false;
13007 int code = 0;
13009 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
13011 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13012 gcc_assert (parts.index == NULL_RTX);
13013 parts.index = XVECEXP (addr, 0, 1);
13014 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
13015 addr = XVECEXP (addr, 0, 0);
13016 vsib = true;
13018 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
13020 gcc_assert (TARGET_64BIT);
13021 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13022 code = 'q';
13024 else
13025 ok = ix86_decompose_address (addr, &parts);
13027 gcc_assert (ok);
13029 base = parts.base;
13030 index = parts.index;
13031 disp = parts.disp;
13032 scale = parts.scale;
13034 if (ADDR_SPACE_GENERIC_P (as))
13035 as = parts.seg;
13036 else
13037 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
13039 if (!ADDR_SPACE_GENERIC_P (as))
13041 if (ASSEMBLER_DIALECT == ASM_ATT)
13042 putc ('%', file);
13044 switch (as)
13046 case ADDR_SPACE_SEG_FS:
13047 fputs ("fs:", file);
13048 break;
13049 case ADDR_SPACE_SEG_GS:
13050 fputs ("gs:", file);
13051 break;
13052 default:
13053 gcc_unreachable ();
13057 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13058 if (TARGET_64BIT && !base && !index && !no_rip)
13060 rtx symbol = disp;
13062 if (GET_CODE (disp) == CONST
13063 && GET_CODE (XEXP (disp, 0)) == PLUS
13064 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13065 symbol = XEXP (XEXP (disp, 0), 0);
13067 if (GET_CODE (symbol) == LABEL_REF
13068 || (GET_CODE (symbol) == SYMBOL_REF
13069 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13070 base = pc_rtx;
13073 if (!base && !index)
13075 /* Displacement only requires special attention. */
13076 if (CONST_INT_P (disp))
13078 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
13079 fputs ("ds:", file);
13080 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13082 /* Load the external function address via the GOT slot to avoid PLT. */
13083 else if (GET_CODE (disp) == CONST
13084 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13085 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
13086 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
13087 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
13088 output_pic_addr_const (file, disp, 0);
13089 else if (flag_pic)
13090 output_pic_addr_const (file, disp, 0);
13091 else
13092 output_addr_const (file, disp);
13094 else
13096 /* Print SImode register names to force addr32 prefix. */
13097 if (SImode_address_operand (addr, VOIDmode))
13099 if (flag_checking)
13101 gcc_assert (TARGET_64BIT);
13102 switch (GET_CODE (addr))
13104 case SUBREG:
13105 gcc_assert (GET_MODE (addr) == SImode);
13106 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
13107 break;
13108 case ZERO_EXTEND:
13109 case AND:
13110 gcc_assert (GET_MODE (addr) == DImode);
13111 break;
13112 default:
13113 gcc_unreachable ();
13116 gcc_assert (!code);
13117 code = 'k';
13119 else if (code == 0
13120 && TARGET_X32
13121 && disp
13122 && CONST_INT_P (disp)
13123 && INTVAL (disp) < -16*1024*1024)
13125 /* X32 runs in 64-bit mode, where displacement, DISP, in
13126 address DISP(%r64), is encoded as 32-bit immediate sign-
13127 extended from 32-bit to 64-bit. For -0x40000300(%r64),
13128 address is %r64 + 0xffffffffbffffd00. When %r64 <
13129 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
13130 which is invalid for x32. The correct address is %r64
13131 - 0x40000300 == 0xf7ffdd64. To properly encode
13132 -0x40000300(%r64) for x32, we zero-extend negative
13133 displacement by forcing addr32 prefix which truncates
13134 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
13135 zero-extend all negative displacements, including -1(%rsp).
13136 However, for small negative displacements, sign-extension
13137 won't cause overflow. We only zero-extend negative
13138 displacements if they < -16*1024*1024, which is also used
13139 to check legitimate address displacements for PIC. */
13140 code = 'k';
13143 /* Since the upper 32 bits of RSP are always zero for x32,
13144 we can encode %esp as %rsp to avoid 0x67 prefix if
13145 there is no index register. */
13146 if (TARGET_X32 && Pmode == SImode
13147 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
13148 code = 'q';
13150 if (ASSEMBLER_DIALECT == ASM_ATT)
13152 if (disp)
13154 if (flag_pic)
13155 output_pic_addr_const (file, disp, 0);
13156 else if (GET_CODE (disp) == LABEL_REF)
13157 output_asm_label (disp);
13158 else
13159 output_addr_const (file, disp);
13162 putc ('(', file);
13163 if (base)
13164 print_reg (base, code, file);
13165 if (index)
13167 putc (',', file);
13168 print_reg (index, vsib ? 0 : code, file);
13169 if (scale != 1 || vsib)
13170 fprintf (file, ",%d", scale);
13172 putc (')', file);
13174 else
13176 rtx offset = NULL_RTX;
13178 if (disp)
13180 /* Pull out the offset of a symbol; print any symbol itself. */
13181 if (GET_CODE (disp) == CONST
13182 && GET_CODE (XEXP (disp, 0)) == PLUS
13183 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13185 offset = XEXP (XEXP (disp, 0), 1);
13186 disp = gen_rtx_CONST (VOIDmode,
13187 XEXP (XEXP (disp, 0), 0));
13190 if (flag_pic)
13191 output_pic_addr_const (file, disp, 0);
13192 else if (GET_CODE (disp) == LABEL_REF)
13193 output_asm_label (disp);
13194 else if (CONST_INT_P (disp))
13195 offset = disp;
13196 else
13197 output_addr_const (file, disp);
13200 putc ('[', file);
13201 if (base)
13203 print_reg (base, code, file);
13204 if (offset)
13206 if (INTVAL (offset) >= 0)
13207 putc ('+', file);
13208 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13211 else if (offset)
13212 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13213 else
13214 putc ('0', file);
13216 if (index)
13218 putc ('+', file);
13219 print_reg (index, vsib ? 0 : code, file);
13220 if (scale != 1 || vsib)
13221 fprintf (file, "*%d", scale);
13223 putc (']', file);
13228 static void
13229 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
13231 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
13234 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13236 static bool
13237 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13239 rtx op;
13241 if (GET_CODE (x) != UNSPEC)
13242 return false;
13244 op = XVECEXP (x, 0, 0);
13245 switch (XINT (x, 1))
13247 case UNSPEC_GOTOFF:
13248 output_addr_const (file, op);
13249 fputs ("@gotoff", file);
13250 break;
13251 case UNSPEC_GOTTPOFF:
13252 output_addr_const (file, op);
13253 /* FIXME: This might be @TPOFF in Sun ld. */
13254 fputs ("@gottpoff", file);
13255 break;
13256 case UNSPEC_TPOFF:
13257 output_addr_const (file, op);
13258 fputs ("@tpoff", file);
13259 break;
13260 case UNSPEC_NTPOFF:
13261 output_addr_const (file, op);
13262 if (TARGET_64BIT)
13263 fputs ("@tpoff", file);
13264 else
13265 fputs ("@ntpoff", file);
13266 break;
13267 case UNSPEC_DTPOFF:
13268 output_addr_const (file, op);
13269 fputs ("@dtpoff", file);
13270 break;
13271 case UNSPEC_GOTNTPOFF:
13272 output_addr_const (file, op);
13273 if (TARGET_64BIT)
13274 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13275 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13276 else
13277 fputs ("@gotntpoff", file);
13278 break;
13279 case UNSPEC_INDNTPOFF:
13280 output_addr_const (file, op);
13281 fputs ("@indntpoff", file);
13282 break;
13283 #if TARGET_MACHO
13284 case UNSPEC_MACHOPIC_OFFSET:
13285 output_addr_const (file, op);
13286 putc ('-', file);
13287 machopic_output_function_base_name (file);
13288 break;
13289 #endif
13291 default:
13292 return false;
13295 return true;
13299 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13300 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13301 is the expression of the binary operation. The output may either be
13302 emitted here, or returned to the caller, like all output_* functions.
13304 There is no guarantee that the operands are the same mode, as they
13305 might be within FLOAT or FLOAT_EXTEND expressions. */
13307 #ifndef SYSV386_COMPAT
13308 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13309 wants to fix the assemblers because that causes incompatibility
13310 with gcc. No-one wants to fix gcc because that causes
13311 incompatibility with assemblers... You can use the option of
13312 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13313 #define SYSV386_COMPAT 1
13314 #endif
13316 const char *
13317 output_387_binary_op (rtx_insn *insn, rtx *operands)
13319 static char buf[40];
13320 const char *p;
13321 bool is_sse
13322 = (SSE_REG_P (operands[0])
13323 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
13325 if (is_sse)
13326 p = "%v";
13327 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13328 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13329 p = "fi";
13330 else
13331 p = "f";
13333 strcpy (buf, p);
13335 switch (GET_CODE (operands[3]))
13337 case PLUS:
13338 p = "add"; break;
13339 case MINUS:
13340 p = "sub"; break;
13341 case MULT:
13342 p = "mul"; break;
13343 case DIV:
13344 p = "div"; break;
13345 default:
13346 gcc_unreachable ();
13349 strcat (buf, p);
13351 if (is_sse)
13353 p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
13354 strcat (buf, p);
13356 if (TARGET_AVX)
13357 p = "\t{%2, %1, %0|%0, %1, %2}";
13358 else
13359 p = "\t{%2, %0|%0, %2}";
13361 strcat (buf, p);
13362 return buf;
13365 /* Even if we do not want to check the inputs, this documents input
13366 constraints. Which helps in understanding the following code. */
13367 if (flag_checking)
13369 if (STACK_REG_P (operands[0])
13370 && ((REG_P (operands[1])
13371 && REGNO (operands[0]) == REGNO (operands[1])
13372 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13373 || (REG_P (operands[2])
13374 && REGNO (operands[0]) == REGNO (operands[2])
13375 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13376 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13377 ; /* ok */
13378 else
13379 gcc_unreachable ();
13382 switch (GET_CODE (operands[3]))
13384 case MULT:
13385 case PLUS:
13386 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13387 std::swap (operands[1], operands[2]);
13389 /* know operands[0] == operands[1]. */
13391 if (MEM_P (operands[2]))
13393 p = "%Z2\t%2";
13394 break;
13397 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13399 if (STACK_TOP_P (operands[0]))
13400 /* How is it that we are storing to a dead operand[2]?
13401 Well, presumably operands[1] is dead too. We can't
13402 store the result to st(0) as st(0) gets popped on this
13403 instruction. Instead store to operands[2] (which I
13404 think has to be st(1)). st(1) will be popped later.
13405 gcc <= 2.8.1 didn't have this check and generated
13406 assembly code that the Unixware assembler rejected. */
13407 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13408 else
13409 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13410 break;
13413 if (STACK_TOP_P (operands[0]))
13414 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13415 else
13416 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13417 break;
13419 case MINUS:
13420 case DIV:
13421 if (MEM_P (operands[1]))
13423 p = "r%Z1\t%1";
13424 break;
13427 if (MEM_P (operands[2]))
13429 p = "%Z2\t%2";
13430 break;
13433 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13435 #if SYSV386_COMPAT
13436 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13437 derived assemblers, confusingly reverse the direction of
13438 the operation for fsub{r} and fdiv{r} when the
13439 destination register is not st(0). The Intel assembler
13440 doesn't have this brain damage. Read !SYSV386_COMPAT to
13441 figure out what the hardware really does. */
13442 if (STACK_TOP_P (operands[0]))
13443 p = "{p\t%0, %2|rp\t%2, %0}";
13444 else
13445 p = "{rp\t%2, %0|p\t%0, %2}";
13446 #else
13447 if (STACK_TOP_P (operands[0]))
13448 /* As above for fmul/fadd, we can't store to st(0). */
13449 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13450 else
13451 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13452 #endif
13453 break;
13456 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13458 #if SYSV386_COMPAT
13459 if (STACK_TOP_P (operands[0]))
13460 p = "{rp\t%0, %1|p\t%1, %0}";
13461 else
13462 p = "{p\t%1, %0|rp\t%0, %1}";
13463 #else
13464 if (STACK_TOP_P (operands[0]))
13465 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13466 else
13467 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13468 #endif
13469 break;
13472 if (STACK_TOP_P (operands[0]))
13474 if (STACK_TOP_P (operands[1]))
13475 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13476 else
13477 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13478 break;
13480 else if (STACK_TOP_P (operands[1]))
13482 #if SYSV386_COMPAT
13483 p = "{\t%1, %0|r\t%0, %1}";
13484 #else
13485 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13486 #endif
13488 else
13490 #if SYSV386_COMPAT
13491 p = "{r\t%2, %0|\t%0, %2}";
13492 #else
13493 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13494 #endif
13496 break;
13498 default:
13499 gcc_unreachable ();
13502 strcat (buf, p);
13503 return buf;
13506 /* Return needed mode for entity in optimize_mode_switching pass. */
13508 static int
13509 ix86_dirflag_mode_needed (rtx_insn *insn)
13511 if (CALL_P (insn))
13513 if (cfun->machine->func_type == TYPE_NORMAL)
13514 return X86_DIRFLAG_ANY;
13515 else
13516 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
13517 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
13520 if (recog_memoized (insn) < 0)
13521 return X86_DIRFLAG_ANY;
13523 if (get_attr_type (insn) == TYPE_STR)
13525 /* Emit cld instruction if stringops are used in the function. */
13526 if (cfun->machine->func_type == TYPE_NORMAL)
13527 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
13528 else
13529 return X86_DIRFLAG_RESET;
13532 return X86_DIRFLAG_ANY;
13535 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
13537 static bool
13538 ix86_check_avx_upper_register (const_rtx exp)
13540 return SSE_REG_P (exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128;
13543 /* Return needed mode for entity in optimize_mode_switching pass. */
13545 static int
13546 ix86_avx_u128_mode_needed (rtx_insn *insn)
13548 if (CALL_P (insn))
13550 rtx link;
13552 /* Needed mode is set to AVX_U128_CLEAN if there are
13553 no 256bit or 512bit modes used in function arguments. */
13554 for (link = CALL_INSN_FUNCTION_USAGE (insn);
13555 link;
13556 link = XEXP (link, 1))
13558 if (GET_CODE (XEXP (link, 0)) == USE)
13560 rtx arg = XEXP (XEXP (link, 0), 0);
13562 if (ix86_check_avx_upper_register (arg))
13563 return AVX_U128_DIRTY;
13567 return AVX_U128_CLEAN;
13570 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
13571 Hardware changes state only when a 256bit register is written to,
13572 but we need to prevent the compiler from moving optimal insertion
13573 point above eventual read from 256bit or 512 bit register. */
13574 subrtx_iterator::array_type array;
13575 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13576 if (ix86_check_avx_upper_register (*iter))
13577 return AVX_U128_DIRTY;
13579 return AVX_U128_ANY;
13582 /* Return mode that i387 must be switched into
13583 prior to the execution of insn. */
13585 static int
13586 ix86_i387_mode_needed (int entity, rtx_insn *insn)
13588 enum attr_i387_cw mode;
13590 /* The mode UNINITIALIZED is used to store control word after a
13591 function call or ASM pattern. The mode ANY specify that function
13592 has no requirements on the control word and make no changes in the
13593 bits we are interested in. */
13595 if (CALL_P (insn)
13596 || (NONJUMP_INSN_P (insn)
13597 && (asm_noperands (PATTERN (insn)) >= 0
13598 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13599 return I387_CW_UNINITIALIZED;
13601 if (recog_memoized (insn) < 0)
13602 return I387_CW_ANY;
13604 mode = get_attr_i387_cw (insn);
13606 switch (entity)
13608 case I387_TRUNC:
13609 if (mode == I387_CW_TRUNC)
13610 return mode;
13611 break;
13613 case I387_FLOOR:
13614 if (mode == I387_CW_FLOOR)
13615 return mode;
13616 break;
13618 case I387_CEIL:
13619 if (mode == I387_CW_CEIL)
13620 return mode;
13621 break;
13623 default:
13624 gcc_unreachable ();
13627 return I387_CW_ANY;
13630 /* Return mode that entity must be switched into
13631 prior to the execution of insn. */
13633 static int
13634 ix86_mode_needed (int entity, rtx_insn *insn)
13636 switch (entity)
13638 case X86_DIRFLAG:
13639 return ix86_dirflag_mode_needed (insn);
13640 case AVX_U128:
13641 return ix86_avx_u128_mode_needed (insn);
13642 case I387_TRUNC:
13643 case I387_FLOOR:
13644 case I387_CEIL:
13645 return ix86_i387_mode_needed (entity, insn);
13646 default:
13647 gcc_unreachable ();
13649 return 0;
13652 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
13654 static void
13655 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
13657 if (ix86_check_avx_upper_register (dest))
13659 bool *used = (bool *) data;
13660 *used = true;
13664 /* Calculate mode of upper 128bit AVX registers after the insn. */
13666 static int
13667 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
13669 rtx pat = PATTERN (insn);
13671 if (vzeroupper_pattern (pat, VOIDmode)
13672 || vzeroall_pattern (pat, VOIDmode))
13673 return AVX_U128_CLEAN;
13675 /* We know that state is clean after CALL insn if there are no
13676 256bit or 512bit registers used in the function return register. */
13677 if (CALL_P (insn))
13679 bool avx_upper_reg_found = false;
13680 note_stores (pat, ix86_check_avx_upper_stores, &avx_upper_reg_found);
13682 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
13685 /* Otherwise, return current mode. Remember that if insn
13686 references AVX 256bit or 512bit registers, the mode was already
13687 changed to DIRTY from MODE_NEEDED. */
13688 return mode;
13691 /* Return the mode that an insn results in. */
13693 static int
13694 ix86_mode_after (int entity, int mode, rtx_insn *insn)
13696 switch (entity)
13698 case X86_DIRFLAG:
13699 return mode;
13700 case AVX_U128:
13701 return ix86_avx_u128_mode_after (mode, insn);
13702 case I387_TRUNC:
13703 case I387_FLOOR:
13704 case I387_CEIL:
13705 return mode;
13706 default:
13707 gcc_unreachable ();
13711 static int
13712 ix86_dirflag_mode_entry (void)
13714 /* For TARGET_CLD or in the interrupt handler we can't assume
13715 direction flag state at function entry. */
13716 if (TARGET_CLD
13717 || cfun->machine->func_type != TYPE_NORMAL)
13718 return X86_DIRFLAG_ANY;
13720 return X86_DIRFLAG_RESET;
13723 static int
13724 ix86_avx_u128_mode_entry (void)
13726 tree arg;
13728 /* Entry mode is set to AVX_U128_DIRTY if there are
13729 256bit or 512bit modes used in function arguments. */
13730 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
13731 arg = TREE_CHAIN (arg))
13733 rtx incoming = DECL_INCOMING_RTL (arg);
13735 if (incoming && ix86_check_avx_upper_register (incoming))
13736 return AVX_U128_DIRTY;
13739 return AVX_U128_CLEAN;
13742 /* Return a mode that ENTITY is assumed to be
13743 switched to at function entry. */
13745 static int
13746 ix86_mode_entry (int entity)
13748 switch (entity)
13750 case X86_DIRFLAG:
13751 return ix86_dirflag_mode_entry ();
13752 case AVX_U128:
13753 return ix86_avx_u128_mode_entry ();
13754 case I387_TRUNC:
13755 case I387_FLOOR:
13756 case I387_CEIL:
13757 return I387_CW_ANY;
13758 default:
13759 gcc_unreachable ();
13763 static int
13764 ix86_avx_u128_mode_exit (void)
13766 rtx reg = crtl->return_rtx;
13768 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
13769 or 512 bit modes used in the function return register. */
13770 if (reg && ix86_check_avx_upper_register (reg))
13771 return AVX_U128_DIRTY;
13773 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
13774 modes used in function arguments, otherwise return AVX_U128_CLEAN.
13776 return ix86_avx_u128_mode_entry ();
13779 /* Return a mode that ENTITY is assumed to be
13780 switched to at function exit. */
13782 static int
13783 ix86_mode_exit (int entity)
13785 switch (entity)
13787 case X86_DIRFLAG:
13788 return X86_DIRFLAG_ANY;
13789 case AVX_U128:
13790 return ix86_avx_u128_mode_exit ();
13791 case I387_TRUNC:
13792 case I387_FLOOR:
13793 case I387_CEIL:
13794 return I387_CW_ANY;
13795 default:
13796 gcc_unreachable ();
13800 static int
13801 ix86_mode_priority (int, int n)
13803 return n;
13806 /* Output code to initialize control word copies used by trunc?f?i and
13807 rounding patterns. CURRENT_MODE is set to current control word,
13808 while NEW_MODE is set to new control word. */
13810 static void
13811 emit_i387_cw_initialization (int mode)
13813 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13814 rtx new_mode;
13816 enum ix86_stack_slot slot;
13818 rtx reg = gen_reg_rtx (HImode);
13820 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13821 emit_move_insn (reg, copy_rtx (stored_mode));
13823 switch (mode)
13825 case I387_CW_TRUNC:
13826 /* round toward zero (truncate) */
13827 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13828 slot = SLOT_CW_TRUNC;
13829 break;
13831 case I387_CW_FLOOR:
13832 /* round down toward -oo */
13833 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13834 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
13835 slot = SLOT_CW_FLOOR;
13836 break;
13838 case I387_CW_CEIL:
13839 /* round up toward +oo */
13840 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13841 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
13842 slot = SLOT_CW_CEIL;
13843 break;
13845 default:
13846 gcc_unreachable ();
13849 gcc_assert (slot < MAX_386_STACK_LOCALS);
13851 new_mode = assign_386_stack_local (HImode, slot);
13852 emit_move_insn (new_mode, reg);
13855 /* Generate one or more insns to set ENTITY to MODE. */
13857 static void
13858 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
13859 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
13861 switch (entity)
13863 case X86_DIRFLAG:
13864 if (mode == X86_DIRFLAG_RESET)
13865 emit_insn (gen_cld ());
13866 break;
13867 case AVX_U128:
13868 if (mode == AVX_U128_CLEAN)
13869 emit_insn (gen_avx_vzeroupper ());
13870 break;
13871 case I387_TRUNC:
13872 case I387_FLOOR:
13873 case I387_CEIL:
13874 if (mode != I387_CW_ANY
13875 && mode != I387_CW_UNINITIALIZED)
13876 emit_i387_cw_initialization (mode);
13877 break;
13878 default:
13879 gcc_unreachable ();
13883 /* Output code for INSN to convert a float to a signed int. OPERANDS
13884 are the insn operands. The output may be [HSD]Imode and the input
13885 operand may be [SDX]Fmode. */
13887 const char *
13888 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
13890 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
13891 bool dimode_p = GET_MODE (operands[0]) == DImode;
13892 int round_mode = get_attr_i387_cw (insn);
13894 static char buf[40];
13895 const char *p;
13897 /* Jump through a hoop or two for DImode, since the hardware has no
13898 non-popping instruction. We used to do this a different way, but
13899 that was somewhat fragile and broke with post-reload splitters. */
13900 if ((dimode_p || fisttp) && !stack_top_dies)
13901 output_asm_insn ("fld\t%y1", operands);
13903 gcc_assert (STACK_TOP_P (operands[1]));
13904 gcc_assert (MEM_P (operands[0]));
13905 gcc_assert (GET_MODE (operands[1]) != TFmode);
13907 if (fisttp)
13908 return "fisttp%Z0\t%0";
13910 strcpy (buf, "fist");
13912 if (round_mode != I387_CW_ANY)
13913 output_asm_insn ("fldcw\t%3", operands);
13915 p = "p%Z0\t%0";
13916 strcat (buf, p + !(stack_top_dies || dimode_p));
13918 output_asm_insn (buf, operands);
13920 if (round_mode != I387_CW_ANY)
13921 output_asm_insn ("fldcw\t%2", operands);
13923 return "";
13926 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13927 have the values zero or one, indicates the ffreep insn's operand
13928 from the OPERANDS array. */
13930 static const char *
13931 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
13933 if (TARGET_USE_FFREEP)
13934 #ifdef HAVE_AS_IX86_FFREEP
13935 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
13936 #else
13938 static char retval[32];
13939 int regno = REGNO (operands[opno]);
13941 gcc_assert (STACK_REGNO_P (regno));
13943 regno -= FIRST_STACK_REG;
13945 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
13946 return retval;
13948 #endif
13950 return opno ? "fstp\t%y1" : "fstp\t%y0";
13954 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13955 should be used. UNORDERED_P is true when fucom should be used. */
13957 const char *
13958 output_fp_compare (rtx_insn *insn, rtx *operands,
13959 bool eflags_p, bool unordered_p)
13961 rtx *xops = eflags_p ? &operands[0] : &operands[1];
13962 bool stack_top_dies;
13964 static char buf[40];
13965 const char *p;
13967 gcc_assert (STACK_TOP_P (xops[0]));
13969 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
13971 if (eflags_p)
13973 p = unordered_p ? "fucomi" : "fcomi";
13974 strcpy (buf, p);
13976 p = "p\t{%y1, %0|%0, %y1}";
13977 strcat (buf, p + !stack_top_dies);
13979 return buf;
13982 if (STACK_REG_P (xops[1])
13983 && stack_top_dies
13984 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
13986 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
13988 /* If both the top of the 387 stack die, and the other operand
13989 is also a stack register that dies, then this must be a
13990 `fcompp' float compare. */
13991 p = unordered_p ? "fucompp" : "fcompp";
13992 strcpy (buf, p);
13994 else if (const0_operand (xops[1], VOIDmode))
13996 gcc_assert (!unordered_p);
13997 strcpy (buf, "ftst");
13999 else
14001 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
14003 gcc_assert (!unordered_p);
14004 p = "ficom";
14006 else
14007 p = unordered_p ? "fucom" : "fcom";
14009 strcpy (buf, p);
14011 p = "p%Z2\t%y2";
14012 strcat (buf, p + !stack_top_dies);
14015 output_asm_insn (buf, operands);
14016 return "fnstsw\t%0";
14019 void
14020 ix86_output_addr_vec_elt (FILE *file, int value)
14022 const char *directive = ASM_LONG;
14024 #ifdef ASM_QUAD
14025 if (TARGET_LP64)
14026 directive = ASM_QUAD;
14027 #else
14028 gcc_assert (!TARGET_64BIT);
14029 #endif
14031 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14034 void
14035 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14037 const char *directive = ASM_LONG;
14039 #ifdef ASM_QUAD
14040 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14041 directive = ASM_QUAD;
14042 #else
14043 gcc_assert (!TARGET_64BIT);
14044 #endif
14045 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14046 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14047 fprintf (file, "%s%s%d-%s%d\n",
14048 directive, LPREFIX, value, LPREFIX, rel);
14049 #if TARGET_MACHO
14050 else if (TARGET_MACHO)
14052 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14053 machopic_output_function_base_name (file);
14054 putc ('\n', file);
14056 #endif
14057 else if (HAVE_AS_GOTOFF_IN_DATA)
14058 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14059 else
14060 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14061 GOT_SYMBOL_NAME, LPREFIX, value);
14064 #define LEA_MAX_STALL (3)
14065 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
14067 /* Increase given DISTANCE in half-cycles according to
14068 dependencies between PREV and NEXT instructions.
14069 Add 1 half-cycle if there is no dependency and
14070 go to next cycle if there is some dependecy. */
14072 static unsigned int
14073 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
14075 df_ref def, use;
14077 if (!prev || !next)
14078 return distance + (distance & 1) + 2;
14080 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
14081 return distance + 1;
14083 FOR_EACH_INSN_USE (use, next)
14084 FOR_EACH_INSN_DEF (def, prev)
14085 if (!DF_REF_IS_ARTIFICIAL (def)
14086 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
14087 return distance + (distance & 1) + 2;
14089 return distance + 1;
14092 /* Function checks if instruction INSN defines register number
14093 REGNO1 or REGNO2. */
14095 bool
14096 insn_defines_reg (unsigned int regno1, unsigned int regno2,
14097 rtx_insn *insn)
14099 df_ref def;
14101 FOR_EACH_INSN_DEF (def, insn)
14102 if (DF_REF_REG_DEF_P (def)
14103 && !DF_REF_IS_ARTIFICIAL (def)
14104 && (regno1 == DF_REF_REGNO (def)
14105 || regno2 == DF_REF_REGNO (def)))
14106 return true;
14108 return false;
14111 /* Function checks if instruction INSN uses register number
14112 REGNO as a part of address expression. */
14114 static bool
14115 insn_uses_reg_mem (unsigned int regno, rtx insn)
14117 df_ref use;
14119 FOR_EACH_INSN_USE (use, insn)
14120 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
14121 return true;
14123 return false;
14126 /* Search backward for non-agu definition of register number REGNO1
14127 or register number REGNO2 in basic block starting from instruction
14128 START up to head of basic block or instruction INSN.
14130 Function puts true value into *FOUND var if definition was found
14131 and false otherwise.
14133 Distance in half-cycles between START and found instruction or head
14134 of BB is added to DISTANCE and returned. */
14136 static int
14137 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
14138 rtx_insn *insn, int distance,
14139 rtx_insn *start, bool *found)
14141 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
14142 rtx_insn *prev = start;
14143 rtx_insn *next = NULL;
14145 *found = false;
14147 while (prev
14148 && prev != insn
14149 && distance < LEA_SEARCH_THRESHOLD)
14151 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
14153 distance = increase_distance (prev, next, distance);
14154 if (insn_defines_reg (regno1, regno2, prev))
14156 if (recog_memoized (prev) < 0
14157 || get_attr_type (prev) != TYPE_LEA)
14159 *found = true;
14160 return distance;
14164 next = prev;
14166 if (prev == BB_HEAD (bb))
14167 break;
14169 prev = PREV_INSN (prev);
14172 return distance;
14175 /* Search backward for non-agu definition of register number REGNO1
14176 or register number REGNO2 in INSN's basic block until
14177 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14178 2. Reach neighbor BBs boundary, or
14179 3. Reach agu definition.
14180 Returns the distance between the non-agu definition point and INSN.
14181 If no definition point, returns -1. */
14183 static int
14184 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14185 rtx_insn *insn)
14187 basic_block bb = BLOCK_FOR_INSN (insn);
14188 int distance = 0;
14189 bool found = false;
14191 if (insn != BB_HEAD (bb))
14192 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
14193 distance, PREV_INSN (insn),
14194 &found);
14196 if (!found && distance < LEA_SEARCH_THRESHOLD)
14198 edge e;
14199 edge_iterator ei;
14200 bool simple_loop = false;
14202 FOR_EACH_EDGE (e, ei, bb->preds)
14203 if (e->src == bb)
14205 simple_loop = true;
14206 break;
14209 if (simple_loop)
14210 distance = distance_non_agu_define_in_bb (regno1, regno2,
14211 insn, distance,
14212 BB_END (bb), &found);
14213 else
14215 int shortest_dist = -1;
14216 bool found_in_bb = false;
14218 FOR_EACH_EDGE (e, ei, bb->preds)
14220 int bb_dist
14221 = distance_non_agu_define_in_bb (regno1, regno2,
14222 insn, distance,
14223 BB_END (e->src),
14224 &found_in_bb);
14225 if (found_in_bb)
14227 if (shortest_dist < 0)
14228 shortest_dist = bb_dist;
14229 else if (bb_dist > 0)
14230 shortest_dist = MIN (bb_dist, shortest_dist);
14232 found = true;
14236 distance = shortest_dist;
14240 /* get_attr_type may modify recog data. We want to make sure
14241 that recog data is valid for instruction INSN, on which
14242 distance_non_agu_define is called. INSN is unchanged here. */
14243 extract_insn_cached (insn);
14245 if (!found)
14246 return -1;
14248 return distance >> 1;
14251 /* Return the distance in half-cycles between INSN and the next
14252 insn that uses register number REGNO in memory address added
14253 to DISTANCE. Return -1 if REGNO0 is set.
14255 Put true value into *FOUND if register usage was found and
14256 false otherwise.
14257 Put true value into *REDEFINED if register redefinition was
14258 found and false otherwise. */
14260 static int
14261 distance_agu_use_in_bb (unsigned int regno,
14262 rtx_insn *insn, int distance, rtx_insn *start,
14263 bool *found, bool *redefined)
14265 basic_block bb = NULL;
14266 rtx_insn *next = start;
14267 rtx_insn *prev = NULL;
14269 *found = false;
14270 *redefined = false;
14272 if (start != NULL_RTX)
14274 bb = BLOCK_FOR_INSN (start);
14275 if (start != BB_HEAD (bb))
14276 /* If insn and start belong to the same bb, set prev to insn,
14277 so the call to increase_distance will increase the distance
14278 between insns by 1. */
14279 prev = insn;
14282 while (next
14283 && next != insn
14284 && distance < LEA_SEARCH_THRESHOLD)
14286 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
14288 distance = increase_distance(prev, next, distance);
14289 if (insn_uses_reg_mem (regno, next))
14291 /* Return DISTANCE if OP0 is used in memory
14292 address in NEXT. */
14293 *found = true;
14294 return distance;
14297 if (insn_defines_reg (regno, INVALID_REGNUM, next))
14299 /* Return -1 if OP0 is set in NEXT. */
14300 *redefined = true;
14301 return -1;
14304 prev = next;
14307 if (next == BB_END (bb))
14308 break;
14310 next = NEXT_INSN (next);
14313 return distance;
14316 /* Return the distance between INSN and the next insn that uses
14317 register number REGNO0 in memory address. Return -1 if no such
14318 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14320 static int
14321 distance_agu_use (unsigned int regno0, rtx_insn *insn)
14323 basic_block bb = BLOCK_FOR_INSN (insn);
14324 int distance = 0;
14325 bool found = false;
14326 bool redefined = false;
14328 if (insn != BB_END (bb))
14329 distance = distance_agu_use_in_bb (regno0, insn, distance,
14330 NEXT_INSN (insn),
14331 &found, &redefined);
14333 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
14335 edge e;
14336 edge_iterator ei;
14337 bool simple_loop = false;
14339 FOR_EACH_EDGE (e, ei, bb->succs)
14340 if (e->dest == bb)
14342 simple_loop = true;
14343 break;
14346 if (simple_loop)
14347 distance = distance_agu_use_in_bb (regno0, insn,
14348 distance, BB_HEAD (bb),
14349 &found, &redefined);
14350 else
14352 int shortest_dist = -1;
14353 bool found_in_bb = false;
14354 bool redefined_in_bb = false;
14356 FOR_EACH_EDGE (e, ei, bb->succs)
14358 int bb_dist
14359 = distance_agu_use_in_bb (regno0, insn,
14360 distance, BB_HEAD (e->dest),
14361 &found_in_bb, &redefined_in_bb);
14362 if (found_in_bb)
14364 if (shortest_dist < 0)
14365 shortest_dist = bb_dist;
14366 else if (bb_dist > 0)
14367 shortest_dist = MIN (bb_dist, shortest_dist);
14369 found = true;
14373 distance = shortest_dist;
14377 if (!found || redefined)
14378 return -1;
14380 return distance >> 1;
14383 /* Define this macro to tune LEA priority vs ADD, it take effect when
14384 there is a dilemma of choicing LEA or ADD
14385 Negative value: ADD is more preferred than LEA
14386 Zero: Netrual
14387 Positive value: LEA is more preferred than ADD*/
14388 #define IX86_LEA_PRIORITY 0
14390 /* Return true if usage of lea INSN has performance advantage
14391 over a sequence of instructions. Instructions sequence has
14392 SPLIT_COST cycles higher latency than lea latency. */
14394 static bool
14395 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
14396 unsigned int regno2, int split_cost, bool has_scale)
14398 int dist_define, dist_use;
14400 /* For Silvermont if using a 2-source or 3-source LEA for
14401 non-destructive destination purposes, or due to wanting
14402 ability to use SCALE, the use of LEA is justified. */
14403 if (TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
14404 || TARGET_TREMONT || TARGET_INTEL)
14406 if (has_scale)
14407 return true;
14408 if (split_cost < 1)
14409 return false;
14410 if (regno0 == regno1 || regno0 == regno2)
14411 return false;
14412 return true;
14415 dist_define = distance_non_agu_define (regno1, regno2, insn);
14416 dist_use = distance_agu_use (regno0, insn);
14418 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
14420 /* If there is no non AGU operand definition, no AGU
14421 operand usage and split cost is 0 then both lea
14422 and non lea variants have same priority. Currently
14423 we prefer lea for 64 bit code and non lea on 32 bit
14424 code. */
14425 if (dist_use < 0 && split_cost == 0)
14426 return TARGET_64BIT || IX86_LEA_PRIORITY;
14427 else
14428 return true;
14431 /* With longer definitions distance lea is more preferable.
14432 Here we change it to take into account splitting cost and
14433 lea priority. */
14434 dist_define += split_cost + IX86_LEA_PRIORITY;
14436 /* If there is no use in memory addess then we just check
14437 that split cost exceeds AGU stall. */
14438 if (dist_use < 0)
14439 return dist_define > LEA_MAX_STALL;
14441 /* If this insn has both backward non-agu dependence and forward
14442 agu dependence, the one with short distance takes effect. */
14443 return dist_define >= dist_use;
14446 /* Return true if it is legal to clobber flags by INSN and
14447 false otherwise. */
14449 static bool
14450 ix86_ok_to_clobber_flags (rtx_insn *insn)
14452 basic_block bb = BLOCK_FOR_INSN (insn);
14453 df_ref use;
14454 bitmap live;
14456 while (insn)
14458 if (NONDEBUG_INSN_P (insn))
14460 FOR_EACH_INSN_USE (use, insn)
14461 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
14462 return false;
14464 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
14465 return true;
14468 if (insn == BB_END (bb))
14469 break;
14471 insn = NEXT_INSN (insn);
14474 live = df_get_live_out(bb);
14475 return !REGNO_REG_SET_P (live, FLAGS_REG);
14478 /* Return true if we need to split op0 = op1 + op2 into a sequence of
14479 move and add to avoid AGU stalls. */
14481 bool
14482 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
14484 unsigned int regno0, regno1, regno2;
14486 /* Check if we need to optimize. */
14487 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14488 return false;
14490 /* Check it is correct to split here. */
14491 if (!ix86_ok_to_clobber_flags(insn))
14492 return false;
14494 regno0 = true_regnum (operands[0]);
14495 regno1 = true_regnum (operands[1]);
14496 regno2 = true_regnum (operands[2]);
14498 /* We need to split only adds with non destructive
14499 destination operand. */
14500 if (regno0 == regno1 || regno0 == regno2)
14501 return false;
14502 else
14503 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
14506 /* Return true if we should emit lea instruction instead of mov
14507 instruction. */
14509 bool
14510 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
14512 unsigned int regno0, regno1;
14514 /* Check if we need to optimize. */
14515 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14516 return false;
14518 /* Use lea for reg to reg moves only. */
14519 if (!REG_P (operands[0]) || !REG_P (operands[1]))
14520 return false;
14522 regno0 = true_regnum (operands[0]);
14523 regno1 = true_regnum (operands[1]);
14525 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
14528 /* Return true if we need to split lea into a sequence of
14529 instructions to avoid AGU stalls. */
14531 bool
14532 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
14534 unsigned int regno0, regno1, regno2;
14535 int split_cost;
14536 struct ix86_address parts;
14537 int ok;
14539 /* Check we need to optimize. */
14540 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
14541 return false;
14543 /* The "at least two components" test below might not catch simple
14544 move or zero extension insns if parts.base is non-NULL and parts.disp
14545 is const0_rtx as the only components in the address, e.g. if the
14546 register is %rbp or %r13. As this test is much cheaper and moves or
14547 zero extensions are the common case, do this check first. */
14548 if (REG_P (operands[1])
14549 || (SImode_address_operand (operands[1], VOIDmode)
14550 && REG_P (XEXP (operands[1], 0))))
14551 return false;
14553 /* Check if it is OK to split here. */
14554 if (!ix86_ok_to_clobber_flags (insn))
14555 return false;
14557 ok = ix86_decompose_address (operands[1], &parts);
14558 gcc_assert (ok);
14560 /* There should be at least two components in the address. */
14561 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
14562 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
14563 return false;
14565 /* We should not split into add if non legitimate pic
14566 operand is used as displacement. */
14567 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
14568 return false;
14570 regno0 = true_regnum (operands[0]) ;
14571 regno1 = INVALID_REGNUM;
14572 regno2 = INVALID_REGNUM;
14574 if (parts.base)
14575 regno1 = true_regnum (parts.base);
14576 if (parts.index)
14577 regno2 = true_regnum (parts.index);
14579 split_cost = 0;
14581 /* Compute how many cycles we will add to execution time
14582 if split lea into a sequence of instructions. */
14583 if (parts.base || parts.index)
14585 /* Have to use mov instruction if non desctructive
14586 destination form is used. */
14587 if (regno1 != regno0 && regno2 != regno0)
14588 split_cost += 1;
14590 /* Have to add index to base if both exist. */
14591 if (parts.base && parts.index)
14592 split_cost += 1;
14594 /* Have to use shift and adds if scale is 2 or greater. */
14595 if (parts.scale > 1)
14597 if (regno0 != regno1)
14598 split_cost += 1;
14599 else if (regno2 == regno0)
14600 split_cost += 4;
14601 else
14602 split_cost += parts.scale;
14605 /* Have to use add instruction with immediate if
14606 disp is non zero. */
14607 if (parts.disp && parts.disp != const0_rtx)
14608 split_cost += 1;
14610 /* Subtract the price of lea. */
14611 split_cost -= 1;
14614 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
14615 parts.scale > 1);
14618 /* Return true if it is ok to optimize an ADD operation to LEA
14619 operation to avoid flag register consumation. For most processors,
14620 ADD is faster than LEA. For the processors like BONNELL, if the
14621 destination register of LEA holds an actual address which will be
14622 used soon, LEA is better and otherwise ADD is better. */
14624 bool
14625 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
14627 unsigned int regno0 = true_regnum (operands[0]);
14628 unsigned int regno1 = true_regnum (operands[1]);
14629 unsigned int regno2 = true_regnum (operands[2]);
14631 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14632 if (regno0 != regno1 && regno0 != regno2)
14633 return true;
14635 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14636 return false;
14638 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
14641 /* Return true if destination reg of SET_BODY is shift count of
14642 USE_BODY. */
14644 static bool
14645 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14647 rtx set_dest;
14648 rtx shift_rtx;
14649 int i;
14651 /* Retrieve destination of SET_BODY. */
14652 switch (GET_CODE (set_body))
14654 case SET:
14655 set_dest = SET_DEST (set_body);
14656 if (!set_dest || !REG_P (set_dest))
14657 return false;
14658 break;
14659 case PARALLEL:
14660 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14661 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14662 use_body))
14663 return true;
14664 /* FALLTHROUGH */
14665 default:
14666 return false;
14669 /* Retrieve shift count of USE_BODY. */
14670 switch (GET_CODE (use_body))
14672 case SET:
14673 shift_rtx = XEXP (use_body, 1);
14674 break;
14675 case PARALLEL:
14676 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14677 if (ix86_dep_by_shift_count_body (set_body,
14678 XVECEXP (use_body, 0, i)))
14679 return true;
14680 /* FALLTHROUGH */
14681 default:
14682 return false;
14685 if (shift_rtx
14686 && (GET_CODE (shift_rtx) == ASHIFT
14687 || GET_CODE (shift_rtx) == LSHIFTRT
14688 || GET_CODE (shift_rtx) == ASHIFTRT
14689 || GET_CODE (shift_rtx) == ROTATE
14690 || GET_CODE (shift_rtx) == ROTATERT))
14692 rtx shift_count = XEXP (shift_rtx, 1);
14694 /* Return true if shift count is dest of SET_BODY. */
14695 if (REG_P (shift_count))
14697 /* Add check since it can be invoked before register
14698 allocation in pre-reload schedule. */
14699 if (reload_completed
14700 && true_regnum (set_dest) == true_regnum (shift_count))
14701 return true;
14702 else if (REGNO(set_dest) == REGNO(shift_count))
14703 return true;
14707 return false;
14710 /* Return true if destination reg of SET_INSN is shift count of
14711 USE_INSN. */
14713 bool
14714 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
14716 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
14717 PATTERN (use_insn));
14720 /* Return TRUE or FALSE depending on whether the unary operator meets the
14721 appropriate constraints. */
14723 bool
14724 ix86_unary_operator_ok (enum rtx_code,
14725 machine_mode,
14726 rtx operands[2])
14728 /* If one of operands is memory, source and destination must match. */
14729 if ((MEM_P (operands[0])
14730 || MEM_P (operands[1]))
14731 && ! rtx_equal_p (operands[0], operands[1]))
14732 return false;
14733 return true;
14736 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14737 are ok, keeping in mind the possible movddup alternative. */
14739 bool
14740 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
14742 if (MEM_P (operands[0]))
14743 return rtx_equal_p (operands[0], operands[1 + high]);
14744 if (MEM_P (operands[1]) && MEM_P (operands[2]))
14745 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
14746 return true;
14749 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
14750 then replicate the value for all elements of the vector
14751 register. */
14754 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
14756 int i, n_elt;
14757 rtvec v;
14758 machine_mode scalar_mode;
14760 switch (mode)
14762 case E_V64QImode:
14763 case E_V32QImode:
14764 case E_V16QImode:
14765 case E_V32HImode:
14766 case E_V16HImode:
14767 case E_V8HImode:
14768 case E_V16SImode:
14769 case E_V8SImode:
14770 case E_V4SImode:
14771 case E_V8DImode:
14772 case E_V4DImode:
14773 case E_V2DImode:
14774 gcc_assert (vect);
14775 /* FALLTHRU */
14776 case E_V16SFmode:
14777 case E_V8SFmode:
14778 case E_V4SFmode:
14779 case E_V8DFmode:
14780 case E_V4DFmode:
14781 case E_V2DFmode:
14782 n_elt = GET_MODE_NUNITS (mode);
14783 v = rtvec_alloc (n_elt);
14784 scalar_mode = GET_MODE_INNER (mode);
14786 RTVEC_ELT (v, 0) = value;
14788 for (i = 1; i < n_elt; ++i)
14789 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
14791 return gen_rtx_CONST_VECTOR (mode, v);
14793 default:
14794 gcc_unreachable ();
14798 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
14799 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
14800 for an SSE register. If VECT is true, then replicate the mask for
14801 all elements of the vector register. If INVERT is true, then create
14802 a mask excluding the sign bit. */
14805 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
14807 machine_mode vec_mode, imode;
14808 wide_int w;
14809 rtx mask, v;
14811 switch (mode)
14813 case E_V16SImode:
14814 case E_V16SFmode:
14815 case E_V8SImode:
14816 case E_V4SImode:
14817 case E_V8SFmode:
14818 case E_V4SFmode:
14819 vec_mode = mode;
14820 imode = SImode;
14821 break;
14823 case E_V8DImode:
14824 case E_V4DImode:
14825 case E_V2DImode:
14826 case E_V8DFmode:
14827 case E_V4DFmode:
14828 case E_V2DFmode:
14829 vec_mode = mode;
14830 imode = DImode;
14831 break;
14833 case E_TImode:
14834 case E_TFmode:
14835 vec_mode = VOIDmode;
14836 imode = TImode;
14837 break;
14839 default:
14840 gcc_unreachable ();
14843 machine_mode inner_mode = GET_MODE_INNER (mode);
14844 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
14845 GET_MODE_BITSIZE (inner_mode));
14846 if (invert)
14847 w = wi::bit_not (w);
14849 /* Force this value into the low part of a fp vector constant. */
14850 mask = immed_wide_int_const (w, imode);
14851 mask = gen_lowpart (inner_mode, mask);
14853 if (vec_mode == VOIDmode)
14854 return force_reg (inner_mode, mask);
14856 v = ix86_build_const_vector (vec_mode, vect, mask);
14857 return force_reg (vec_mode, v);
14860 /* Return TRUE or FALSE depending on whether the first SET in INSN
14861 has source and destination with matching CC modes, and that the
14862 CC mode is at least as constrained as REQ_MODE. */
14864 bool
14865 ix86_match_ccmode (rtx insn, machine_mode req_mode)
14867 rtx set;
14868 machine_mode set_mode;
14870 set = PATTERN (insn);
14871 if (GET_CODE (set) == PARALLEL)
14872 set = XVECEXP (set, 0, 0);
14873 gcc_assert (GET_CODE (set) == SET);
14874 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
14876 set_mode = GET_MODE (SET_DEST (set));
14877 switch (set_mode)
14879 case E_CCNOmode:
14880 if (req_mode != CCNOmode
14881 && (req_mode != CCmode
14882 || XEXP (SET_SRC (set), 1) != const0_rtx))
14883 return false;
14884 break;
14885 case E_CCmode:
14886 if (req_mode == CCGCmode)
14887 return false;
14888 /* FALLTHRU */
14889 case E_CCGCmode:
14890 if (req_mode == CCGOCmode || req_mode == CCNOmode)
14891 return false;
14892 /* FALLTHRU */
14893 case E_CCGOCmode:
14894 if (req_mode == CCZmode)
14895 return false;
14896 /* FALLTHRU */
14897 case E_CCZmode:
14898 break;
14900 case E_CCGZmode:
14902 case E_CCAmode:
14903 case E_CCCmode:
14904 case E_CCOmode:
14905 case E_CCPmode:
14906 case E_CCSmode:
14907 if (set_mode != req_mode)
14908 return false;
14909 break;
14911 default:
14912 gcc_unreachable ();
14915 return GET_MODE (SET_SRC (set)) == set_mode;
14918 machine_mode
14919 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14921 machine_mode mode = GET_MODE (op0);
14923 if (SCALAR_FLOAT_MODE_P (mode))
14925 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14926 return CCFPmode;
14929 switch (code)
14931 /* Only zero flag is needed. */
14932 case EQ: /* ZF=0 */
14933 case NE: /* ZF!=0 */
14934 return CCZmode;
14935 /* Codes needing carry flag. */
14936 case GEU: /* CF=0 */
14937 case LTU: /* CF=1 */
14938 /* Detect overflow checks. They need just the carry flag. */
14939 if (GET_CODE (op0) == PLUS
14940 && (rtx_equal_p (op1, XEXP (op0, 0))
14941 || rtx_equal_p (op1, XEXP (op0, 1))))
14942 return CCCmode;
14943 else
14944 return CCmode;
14945 case GTU: /* CF=0 & ZF=0 */
14946 case LEU: /* CF=1 | ZF=1 */
14947 return CCmode;
14948 /* Codes possibly doable only with sign flag when
14949 comparing against zero. */
14950 case GE: /* SF=OF or SF=0 */
14951 case LT: /* SF<>OF or SF=1 */
14952 if (op1 == const0_rtx)
14953 return CCGOCmode;
14954 else
14955 /* For other cases Carry flag is not required. */
14956 return CCGCmode;
14957 /* Codes doable only with sign flag when comparing
14958 against zero, but we miss jump instruction for it
14959 so we need to use relational tests against overflow
14960 that thus needs to be zero. */
14961 case GT: /* ZF=0 & SF=OF */
14962 case LE: /* ZF=1 | SF<>OF */
14963 if (op1 == const0_rtx)
14964 return CCNOmode;
14965 else
14966 return CCGCmode;
14967 /* strcmp pattern do (use flags) and combine may ask us for proper
14968 mode. */
14969 case USE:
14970 return CCmode;
14971 default:
14972 gcc_unreachable ();
14976 /* Return the fixed registers used for condition codes. */
14978 static bool
14979 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14981 *p1 = FLAGS_REG;
14982 *p2 = INVALID_REGNUM;
14983 return true;
14986 /* If two condition code modes are compatible, return a condition code
14987 mode which is compatible with both. Otherwise, return
14988 VOIDmode. */
14990 static machine_mode
14991 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
14993 if (m1 == m2)
14994 return m1;
14996 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14997 return VOIDmode;
14999 if ((m1 == CCGCmode && m2 == CCGOCmode)
15000 || (m1 == CCGOCmode && m2 == CCGCmode))
15001 return CCGCmode;
15003 if ((m1 == CCNOmode && m2 == CCGOCmode)
15004 || (m1 == CCGOCmode && m2 == CCNOmode))
15005 return CCNOmode;
15007 if (m1 == CCZmode
15008 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
15009 return m2;
15010 else if (m2 == CCZmode
15011 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
15012 return m1;
15014 switch (m1)
15016 default:
15017 gcc_unreachable ();
15019 case E_CCmode:
15020 case E_CCGCmode:
15021 case E_CCGOCmode:
15022 case E_CCNOmode:
15023 case E_CCAmode:
15024 case E_CCCmode:
15025 case E_CCOmode:
15026 case E_CCPmode:
15027 case E_CCSmode:
15028 case E_CCZmode:
15029 switch (m2)
15031 default:
15032 return VOIDmode;
15034 case E_CCmode:
15035 case E_CCGCmode:
15036 case E_CCGOCmode:
15037 case E_CCNOmode:
15038 case E_CCAmode:
15039 case E_CCCmode:
15040 case E_CCOmode:
15041 case E_CCPmode:
15042 case E_CCSmode:
15043 case E_CCZmode:
15044 return CCmode;
15047 case E_CCFPmode:
15048 /* These are only compatible with themselves, which we already
15049 checked above. */
15050 return VOIDmode;
15054 /* Return strategy to use for floating-point. We assume that fcomi is always
15055 preferrable where available, since that is also true when looking at size
15056 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15058 enum ix86_fpcmp_strategy
15059 ix86_fp_comparison_strategy (enum rtx_code)
15061 /* Do fcomi/sahf based test when profitable. */
15063 if (TARGET_CMOVE)
15064 return IX86_FPCMP_COMI;
15066 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
15067 return IX86_FPCMP_SAHF;
15069 return IX86_FPCMP_ARITH;
15072 /* Convert comparison codes we use to represent FP comparison to integer
15073 code that will result in proper branch. Return UNKNOWN if no such code
15074 is available. */
15076 enum rtx_code
15077 ix86_fp_compare_code_to_integer (enum rtx_code code)
15079 switch (code)
15081 case GT:
15082 return GTU;
15083 case GE:
15084 return GEU;
15085 case ORDERED:
15086 case UNORDERED:
15087 return code;
15088 case UNEQ:
15089 return EQ;
15090 case UNLT:
15091 return LTU;
15092 case UNLE:
15093 return LEU;
15094 case LTGT:
15095 return NE;
15096 default:
15097 return UNKNOWN;
15101 /* Zero extend possibly SImode EXP to Pmode register. */
15103 ix86_zero_extend_to_Pmode (rtx exp)
15105 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
15108 /* Return true if the function being called was marked with attribute
15109 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
15110 to handle the non-PIC case in the backend because there is no easy
15111 interface for the front-end to force non-PLT calls to use the GOT.
15112 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
15113 to call the function marked "noplt" indirectly. */
15115 static bool
15116 ix86_nopic_noplt_attribute_p (rtx call_op)
15118 if (flag_pic || ix86_cmodel == CM_LARGE
15119 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
15120 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
15121 || SYMBOL_REF_LOCAL_P (call_op))
15122 return false;
15124 tree symbol_decl = SYMBOL_REF_DECL (call_op);
15126 if (!flag_plt
15127 || (symbol_decl != NULL_TREE
15128 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
15129 return true;
15131 return false;
15134 /* Helper to output the jmp/call. */
15135 static void
15136 ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
15138 if (thunk_name != NULL)
15140 fprintf (asm_out_file, "\tjmp\t");
15141 assemble_name (asm_out_file, thunk_name);
15142 putc ('\n', asm_out_file);
15144 else
15145 output_indirect_thunk (regno);
15148 /* Output indirect branch via a call and return thunk. CALL_OP is a
15149 register which contains the branch target. XASM is the assembly
15150 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
15151 A normal call is converted to:
15153 call __x86_indirect_thunk_reg
15155 and a tail call is converted to:
15157 jmp __x86_indirect_thunk_reg
15160 static void
15161 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
15163 char thunk_name_buf[32];
15164 char *thunk_name;
15165 enum indirect_thunk_prefix need_prefix
15166 = indirect_thunk_need_prefix (current_output_insn);
15167 int regno = REGNO (call_op);
15169 if (cfun->machine->indirect_branch_type
15170 != indirect_branch_thunk_inline)
15172 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15174 int i = regno;
15175 if (i >= FIRST_REX_INT_REG)
15176 i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1);
15177 indirect_thunks_used |= 1 << i;
15179 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15180 thunk_name = thunk_name_buf;
15182 else
15183 thunk_name = NULL;
15185 if (sibcall_p)
15186 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15187 else
15189 if (thunk_name != NULL)
15191 fprintf (asm_out_file, "\tcall\t");
15192 assemble_name (asm_out_file, thunk_name);
15193 putc ('\n', asm_out_file);
15194 return;
15197 char indirectlabel1[32];
15198 char indirectlabel2[32];
15200 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15201 INDIRECT_LABEL,
15202 indirectlabelno++);
15203 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15204 INDIRECT_LABEL,
15205 indirectlabelno++);
15207 /* Jump. */
15208 fputs ("\tjmp\t", asm_out_file);
15209 assemble_name_raw (asm_out_file, indirectlabel2);
15210 fputc ('\n', asm_out_file);
15212 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15214 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15216 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15218 /* Call. */
15219 fputs ("\tcall\t", asm_out_file);
15220 assemble_name_raw (asm_out_file, indirectlabel1);
15221 fputc ('\n', asm_out_file);
15225 /* Output indirect branch via a call and return thunk. CALL_OP is
15226 the branch target. XASM is the assembly template for CALL_OP.
15227 Branch is a tail call if SIBCALL_P is true. A normal call is
15228 converted to:
15230 jmp L2
15232 push CALL_OP
15233 jmp __x86_indirect_thunk
15235 call L1
15237 and a tail call is converted to:
15239 push CALL_OP
15240 jmp __x86_indirect_thunk
15243 static void
15244 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
15245 bool sibcall_p)
15247 char thunk_name_buf[32];
15248 char *thunk_name;
15249 char push_buf[64];
15250 enum indirect_thunk_prefix need_prefix
15251 = indirect_thunk_need_prefix (current_output_insn);
15252 int regno = -1;
15254 if (cfun->machine->indirect_branch_type
15255 != indirect_branch_thunk_inline)
15257 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15258 indirect_thunk_needed = true;
15259 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15260 thunk_name = thunk_name_buf;
15262 else
15263 thunk_name = NULL;
15265 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
15266 TARGET_64BIT ? 'q' : 'l', xasm);
15268 if (sibcall_p)
15270 output_asm_insn (push_buf, &call_op);
15271 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15273 else
15275 char indirectlabel1[32];
15276 char indirectlabel2[32];
15278 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15279 INDIRECT_LABEL,
15280 indirectlabelno++);
15281 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15282 INDIRECT_LABEL,
15283 indirectlabelno++);
15285 /* Jump. */
15286 fputs ("\tjmp\t", asm_out_file);
15287 assemble_name_raw (asm_out_file, indirectlabel2);
15288 fputc ('\n', asm_out_file);
15290 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15292 /* An external function may be called via GOT, instead of PLT. */
15293 if (MEM_P (call_op))
15295 struct ix86_address parts;
15296 rtx addr = XEXP (call_op, 0);
15297 if (ix86_decompose_address (addr, &parts)
15298 && parts.base == stack_pointer_rtx)
15300 /* Since call will adjust stack by -UNITS_PER_WORD,
15301 we must convert "disp(stack, index, scale)" to
15302 "disp+UNITS_PER_WORD(stack, index, scale)". */
15303 if (parts.index)
15305 addr = gen_rtx_MULT (Pmode, parts.index,
15306 GEN_INT (parts.scale));
15307 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15308 addr);
15310 else
15311 addr = stack_pointer_rtx;
15313 rtx disp;
15314 if (parts.disp != NULL_RTX)
15315 disp = plus_constant (Pmode, parts.disp,
15316 UNITS_PER_WORD);
15317 else
15318 disp = GEN_INT (UNITS_PER_WORD);
15320 addr = gen_rtx_PLUS (Pmode, addr, disp);
15321 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
15325 output_asm_insn (push_buf, &call_op);
15327 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15329 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15331 /* Call. */
15332 fputs ("\tcall\t", asm_out_file);
15333 assemble_name_raw (asm_out_file, indirectlabel1);
15334 fputc ('\n', asm_out_file);
15338 /* Output indirect branch via a call and return thunk. CALL_OP is
15339 the branch target. XASM is the assembly template for CALL_OP.
15340 Branch is a tail call if SIBCALL_P is true. */
15342 static void
15343 ix86_output_indirect_branch (rtx call_op, const char *xasm,
15344 bool sibcall_p)
15346 if (REG_P (call_op))
15347 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
15348 else
15349 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
15352 /* Output indirect jump. CALL_OP is the jump target. */
15354 const char *
15355 ix86_output_indirect_jmp (rtx call_op)
15357 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
15359 /* We can't have red-zone since "call" in the indirect thunk
15360 pushes the return address onto stack, destroying red-zone. */
15361 if (ix86_red_zone_size != 0)
15362 gcc_unreachable ();
15364 ix86_output_indirect_branch (call_op, "%0", true);
15365 return "";
15367 else
15368 return "%!jmp\t%A0";
15371 /* Output return instrumentation for current function if needed. */
15373 static void
15374 output_return_instrumentation (void)
15376 if (ix86_instrument_return != instrument_return_none
15377 && flag_fentry
15378 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
15380 if (ix86_flag_record_return)
15381 fprintf (asm_out_file, "1:\n");
15382 switch (ix86_instrument_return)
15384 case instrument_return_call:
15385 fprintf (asm_out_file, "\tcall\t__return__\n");
15386 break;
15387 case instrument_return_nop5:
15388 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
15389 fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
15390 break;
15391 case instrument_return_none:
15392 break;
15395 if (ix86_flag_record_return)
15397 fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
15398 fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
15399 fprintf (asm_out_file, "\t.previous\n");
15404 /* Output function return. CALL_OP is the jump target. Add a REP
15405 prefix to RET if LONG_P is true and function return is kept. */
15407 const char *
15408 ix86_output_function_return (bool long_p)
15410 output_return_instrumentation ();
15412 if (cfun->machine->function_return_type != indirect_branch_keep)
15414 char thunk_name[32];
15415 enum indirect_thunk_prefix need_prefix
15416 = indirect_thunk_need_prefix (current_output_insn);
15418 if (cfun->machine->function_return_type
15419 != indirect_branch_thunk_inline)
15421 bool need_thunk = (cfun->machine->function_return_type
15422 == indirect_branch_thunk);
15423 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
15424 true);
15425 indirect_return_needed |= need_thunk;
15426 fprintf (asm_out_file, "\tjmp\t");
15427 assemble_name (asm_out_file, thunk_name);
15428 putc ('\n', asm_out_file);
15430 else
15431 output_indirect_thunk (INVALID_REGNUM);
15433 return "";
15436 if (!long_p)
15437 return "%!ret";
15439 return "rep%; ret";
15442 /* Output indirect function return. RET_OP is the function return
15443 target. */
15445 const char *
15446 ix86_output_indirect_function_return (rtx ret_op)
15448 if (cfun->machine->function_return_type != indirect_branch_keep)
15450 char thunk_name[32];
15451 enum indirect_thunk_prefix need_prefix
15452 = indirect_thunk_need_prefix (current_output_insn);
15453 unsigned int regno = REGNO (ret_op);
15454 gcc_assert (regno == CX_REG);
15456 if (cfun->machine->function_return_type
15457 != indirect_branch_thunk_inline)
15459 bool need_thunk = (cfun->machine->function_return_type
15460 == indirect_branch_thunk);
15461 indirect_thunk_name (thunk_name, regno, need_prefix, true);
15463 if (need_thunk)
15465 indirect_return_via_cx = true;
15466 indirect_thunks_used |= 1 << CX_REG;
15468 fprintf (asm_out_file, "\tjmp\t");
15469 assemble_name (asm_out_file, thunk_name);
15470 putc ('\n', asm_out_file);
15472 else
15473 output_indirect_thunk (regno);
15475 return "";
15477 else
15478 return "%!jmp\t%A0";
15481 /* Output the assembly for a call instruction. */
15483 const char *
15484 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
15486 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
15487 bool output_indirect_p
15488 = (!TARGET_SEH
15489 && cfun->machine->indirect_branch_type != indirect_branch_keep);
15490 bool seh_nop_p = false;
15491 const char *xasm;
15493 if (SIBLING_CALL_P (insn))
15495 output_return_instrumentation ();
15496 if (direct_p)
15498 if (ix86_nopic_noplt_attribute_p (call_op))
15500 direct_p = false;
15501 if (TARGET_64BIT)
15503 if (output_indirect_p)
15504 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15505 else
15506 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15508 else
15510 if (output_indirect_p)
15511 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15512 else
15513 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15516 else
15517 xasm = "%!jmp\t%P0";
15519 /* SEH epilogue detection requires the indirect branch case
15520 to include REX.W. */
15521 else if (TARGET_SEH)
15522 xasm = "%!rex.W jmp\t%A0";
15523 else
15525 if (output_indirect_p)
15526 xasm = "%0";
15527 else
15528 xasm = "%!jmp\t%A0";
15531 if (output_indirect_p && !direct_p)
15532 ix86_output_indirect_branch (call_op, xasm, true);
15533 else
15534 output_asm_insn (xasm, &call_op);
15535 return "";
15538 /* SEH unwinding can require an extra nop to be emitted in several
15539 circumstances. Determine if we have one of those. */
15540 if (TARGET_SEH)
15542 rtx_insn *i;
15544 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
15546 /* Prevent a catch region from being adjacent to a jump that would
15547 be interpreted as an epilogue sequence by the unwinder. */
15548 if (JUMP_P(i) && CROSSING_JUMP_P (i))
15550 seh_nop_p = true;
15551 break;
15554 /* If we get to another real insn, we don't need the nop. */
15555 if (INSN_P (i))
15556 break;
15558 /* If we get to the epilogue note, prevent a catch region from
15559 being adjacent to the standard epilogue sequence. If non-
15560 call-exceptions, we'll have done this during epilogue emission. */
15561 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
15562 && !flag_non_call_exceptions
15563 && !can_throw_internal (insn))
15565 seh_nop_p = true;
15566 break;
15570 /* If we didn't find a real insn following the call, prevent the
15571 unwinder from looking into the next function. */
15572 if (i == NULL)
15573 seh_nop_p = true;
15576 if (direct_p)
15578 if (ix86_nopic_noplt_attribute_p (call_op))
15580 direct_p = false;
15581 if (TARGET_64BIT)
15583 if (output_indirect_p)
15584 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15585 else
15586 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15588 else
15590 if (output_indirect_p)
15591 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15592 else
15593 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15596 else
15597 xasm = "%!call\t%P0";
15599 else
15601 if (output_indirect_p)
15602 xasm = "%0";
15603 else
15604 xasm = "%!call\t%A0";
15607 if (output_indirect_p && !direct_p)
15608 ix86_output_indirect_branch (call_op, xasm, false);
15609 else
15610 output_asm_insn (xasm, &call_op);
15612 if (seh_nop_p)
15613 return "nop";
15615 return "";
15618 /* Return a MEM corresponding to a stack slot with mode MODE.
15619 Allocate a new slot if necessary.
15621 The RTL for a function can have several slots available: N is
15622 which slot to use. */
15625 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
15627 struct stack_local_entry *s;
15629 gcc_assert (n < MAX_386_STACK_LOCALS);
15631 for (s = ix86_stack_locals; s; s = s->next)
15632 if (s->mode == mode && s->n == n)
15633 return validize_mem (copy_rtx (s->rtl));
15635 s = ggc_alloc<stack_local_entry> ();
15636 s->n = n;
15637 s->mode = mode;
15638 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15640 s->next = ix86_stack_locals;
15641 ix86_stack_locals = s;
15642 return validize_mem (copy_rtx (s->rtl));
15645 static void
15646 ix86_instantiate_decls (void)
15648 struct stack_local_entry *s;
15650 for (s = ix86_stack_locals; s; s = s->next)
15651 if (s->rtl != NULL_RTX)
15652 instantiate_decl_rtl (s->rtl);
15655 /* Check whether x86 address PARTS is a pc-relative address. */
15657 bool
15658 ix86_rip_relative_addr_p (struct ix86_address *parts)
15660 rtx base, index, disp;
15662 base = parts->base;
15663 index = parts->index;
15664 disp = parts->disp;
15666 if (disp && !base && !index)
15668 if (TARGET_64BIT)
15670 rtx symbol = disp;
15672 if (GET_CODE (disp) == CONST)
15673 symbol = XEXP (disp, 0);
15674 if (GET_CODE (symbol) == PLUS
15675 && CONST_INT_P (XEXP (symbol, 1)))
15676 symbol = XEXP (symbol, 0);
15678 if (GET_CODE (symbol) == LABEL_REF
15679 || (GET_CODE (symbol) == SYMBOL_REF
15680 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
15681 || (GET_CODE (symbol) == UNSPEC
15682 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
15683 || XINT (symbol, 1) == UNSPEC_PCREL
15684 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
15685 return true;
15688 return false;
15691 /* Calculate the length of the memory address in the instruction encoding.
15692 Includes addr32 prefix, does not include the one-byte modrm, opcode,
15693 or other prefixes. We never generate addr32 prefix for LEA insn. */
15696 memory_address_length (rtx addr, bool lea)
15698 struct ix86_address parts;
15699 rtx base, index, disp;
15700 int len;
15701 int ok;
15703 if (GET_CODE (addr) == PRE_DEC
15704 || GET_CODE (addr) == POST_INC
15705 || GET_CODE (addr) == PRE_MODIFY
15706 || GET_CODE (addr) == POST_MODIFY)
15707 return 0;
15709 ok = ix86_decompose_address (addr, &parts);
15710 gcc_assert (ok);
15712 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
15714 /* If this is not LEA instruction, add the length of addr32 prefix. */
15715 if (TARGET_64BIT && !lea
15716 && (SImode_address_operand (addr, VOIDmode)
15717 || (parts.base && GET_MODE (parts.base) == SImode)
15718 || (parts.index && GET_MODE (parts.index) == SImode)))
15719 len++;
15721 base = parts.base;
15722 index = parts.index;
15723 disp = parts.disp;
15725 if (base && SUBREG_P (base))
15726 base = SUBREG_REG (base);
15727 if (index && SUBREG_P (index))
15728 index = SUBREG_REG (index);
15730 gcc_assert (base == NULL_RTX || REG_P (base));
15731 gcc_assert (index == NULL_RTX || REG_P (index));
15733 /* Rule of thumb:
15734 - esp as the base always wants an index,
15735 - ebp as the base always wants a displacement,
15736 - r12 as the base always wants an index,
15737 - r13 as the base always wants a displacement. */
15739 /* Register Indirect. */
15740 if (base && !index && !disp)
15742 /* esp (for its index) and ebp (for its displacement) need
15743 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
15744 code. */
15745 if (base == arg_pointer_rtx
15746 || base == frame_pointer_rtx
15747 || REGNO (base) == SP_REG
15748 || REGNO (base) == BP_REG
15749 || REGNO (base) == R12_REG
15750 || REGNO (base) == R13_REG)
15751 len++;
15754 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
15755 is not disp32, but disp32(%rip), so for disp32
15756 SIB byte is needed, unless print_operand_address
15757 optimizes it into disp32(%rip) or (%rip) is implied
15758 by UNSPEC. */
15759 else if (disp && !base && !index)
15761 len += 4;
15762 if (!ix86_rip_relative_addr_p (&parts))
15763 len++;
15765 else
15767 /* Find the length of the displacement constant. */
15768 if (disp)
15770 if (base && satisfies_constraint_K (disp))
15771 len += 1;
15772 else
15773 len += 4;
15775 /* ebp always wants a displacement. Similarly r13. */
15776 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
15777 len++;
15779 /* An index requires the two-byte modrm form.... */
15780 if (index
15781 /* ...like esp (or r12), which always wants an index. */
15782 || base == arg_pointer_rtx
15783 || base == frame_pointer_rtx
15784 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
15785 len++;
15788 return len;
15791 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15792 is set, expect that insn have 8bit immediate alternative. */
15794 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
15796 int len = 0;
15797 int i;
15798 extract_insn_cached (insn);
15799 for (i = recog_data.n_operands - 1; i >= 0; --i)
15800 if (CONSTANT_P (recog_data.operand[i]))
15802 enum attr_mode mode = get_attr_mode (insn);
15804 gcc_assert (!len);
15805 if (shortform && CONST_INT_P (recog_data.operand[i]))
15807 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
15808 switch (mode)
15810 case MODE_QI:
15811 len = 1;
15812 continue;
15813 case MODE_HI:
15814 ival = trunc_int_for_mode (ival, HImode);
15815 break;
15816 case MODE_SI:
15817 ival = trunc_int_for_mode (ival, SImode);
15818 break;
15819 default:
15820 break;
15822 if (IN_RANGE (ival, -128, 127))
15824 len = 1;
15825 continue;
15828 switch (mode)
15830 case MODE_QI:
15831 len = 1;
15832 break;
15833 case MODE_HI:
15834 len = 2;
15835 break;
15836 case MODE_SI:
15837 len = 4;
15838 break;
15839 /* Immediates for DImode instructions are encoded
15840 as 32bit sign extended values. */
15841 case MODE_DI:
15842 len = 4;
15843 break;
15844 default:
15845 fatal_insn ("unknown insn mode", insn);
15848 return len;
15851 /* Compute default value for "length_address" attribute. */
15853 ix86_attr_length_address_default (rtx_insn *insn)
15855 int i;
15857 if (get_attr_type (insn) == TYPE_LEA)
15859 rtx set = PATTERN (insn), addr;
15861 if (GET_CODE (set) == PARALLEL)
15862 set = XVECEXP (set, 0, 0);
15864 gcc_assert (GET_CODE (set) == SET);
15866 addr = SET_SRC (set);
15868 return memory_address_length (addr, true);
15871 extract_insn_cached (insn);
15872 for (i = recog_data.n_operands - 1; i >= 0; --i)
15874 rtx op = recog_data.operand[i];
15875 if (MEM_P (op))
15877 constrain_operands_cached (insn, reload_completed);
15878 if (which_alternative != -1)
15880 const char *constraints = recog_data.constraints[i];
15881 int alt = which_alternative;
15883 while (*constraints == '=' || *constraints == '+')
15884 constraints++;
15885 while (alt-- > 0)
15886 while (*constraints++ != ',')
15888 /* Skip ignored operands. */
15889 if (*constraints == 'X')
15890 continue;
15893 int len = memory_address_length (XEXP (op, 0), false);
15895 /* Account for segment prefix for non-default addr spaces. */
15896 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
15897 len++;
15899 return len;
15902 return 0;
15905 /* Compute default value for "length_vex" attribute. It includes
15906 2 or 3 byte VEX prefix and 1 opcode byte. */
15909 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
15910 bool has_vex_w)
15912 int i;
15914 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
15915 byte VEX prefix. */
15916 if (!has_0f_opcode || has_vex_w)
15917 return 3 + 1;
15919 /* We can always use 2 byte VEX prefix in 32bit. */
15920 if (!TARGET_64BIT)
15921 return 2 + 1;
15923 extract_insn_cached (insn);
15925 for (i = recog_data.n_operands - 1; i >= 0; --i)
15926 if (REG_P (recog_data.operand[i]))
15928 /* REX.W bit uses 3 byte VEX prefix. */
15929 if (GET_MODE (recog_data.operand[i]) == DImode
15930 && GENERAL_REG_P (recog_data.operand[i]))
15931 return 3 + 1;
15933 else
15935 /* REX.X or REX.B bits use 3 byte VEX prefix. */
15936 if (MEM_P (recog_data.operand[i])
15937 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
15938 return 3 + 1;
15941 return 2 + 1;
15945 static bool
15946 ix86_class_likely_spilled_p (reg_class_t);
15948 /* Returns true if lhs of insn is HW function argument register and set up
15949 is_spilled to true if it is likely spilled HW register. */
15950 static bool
15951 insn_is_function_arg (rtx insn, bool* is_spilled)
15953 rtx dst;
15955 if (!NONDEBUG_INSN_P (insn))
15956 return false;
15957 /* Call instructions are not movable, ignore it. */
15958 if (CALL_P (insn))
15959 return false;
15960 insn = PATTERN (insn);
15961 if (GET_CODE (insn) == PARALLEL)
15962 insn = XVECEXP (insn, 0, 0);
15963 if (GET_CODE (insn) != SET)
15964 return false;
15965 dst = SET_DEST (insn);
15966 if (REG_P (dst) && HARD_REGISTER_P (dst)
15967 && ix86_function_arg_regno_p (REGNO (dst)))
15969 /* Is it likely spilled HW register? */
15970 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
15971 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
15972 *is_spilled = true;
15973 return true;
15975 return false;
15978 /* Add output dependencies for chain of function adjacent arguments if only
15979 there is a move to likely spilled HW register. Return first argument
15980 if at least one dependence was added or NULL otherwise. */
15981 static rtx_insn *
15982 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
15984 rtx_insn *insn;
15985 rtx_insn *last = call;
15986 rtx_insn *first_arg = NULL;
15987 bool is_spilled = false;
15989 head = PREV_INSN (head);
15991 /* Find nearest to call argument passing instruction. */
15992 while (true)
15994 last = PREV_INSN (last);
15995 if (last == head)
15996 return NULL;
15997 if (!NONDEBUG_INSN_P (last))
15998 continue;
15999 if (insn_is_function_arg (last, &is_spilled))
16000 break;
16001 return NULL;
16004 first_arg = last;
16005 while (true)
16007 insn = PREV_INSN (last);
16008 if (!INSN_P (insn))
16009 break;
16010 if (insn == head)
16011 break;
16012 if (!NONDEBUG_INSN_P (insn))
16014 last = insn;
16015 continue;
16017 if (insn_is_function_arg (insn, &is_spilled))
16019 /* Add output depdendence between two function arguments if chain
16020 of output arguments contains likely spilled HW registers. */
16021 if (is_spilled)
16022 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16023 first_arg = last = insn;
16025 else
16026 break;
16028 if (!is_spilled)
16029 return NULL;
16030 return first_arg;
16033 /* Add output or anti dependency from insn to first_arg to restrict its code
16034 motion. */
16035 static void
16036 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
16038 rtx set;
16039 rtx tmp;
16041 set = single_set (insn);
16042 if (!set)
16043 return;
16044 tmp = SET_DEST (set);
16045 if (REG_P (tmp))
16047 /* Add output dependency to the first function argument. */
16048 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16049 return;
16051 /* Add anti dependency. */
16052 add_dependence (first_arg, insn, REG_DEP_ANTI);
16055 /* Avoid cross block motion of function argument through adding dependency
16056 from the first non-jump instruction in bb. */
16057 static void
16058 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
16060 rtx_insn *insn = BB_END (bb);
16062 while (insn)
16064 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
16066 rtx set = single_set (insn);
16067 if (set)
16069 avoid_func_arg_motion (arg, insn);
16070 return;
16073 if (insn == BB_HEAD (bb))
16074 return;
16075 insn = PREV_INSN (insn);
16079 /* Hook for pre-reload schedule - avoid motion of function arguments
16080 passed in likely spilled HW registers. */
16081 static void
16082 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
16084 rtx_insn *insn;
16085 rtx_insn *first_arg = NULL;
16086 if (reload_completed)
16087 return;
16088 while (head != tail && DEBUG_INSN_P (head))
16089 head = NEXT_INSN (head);
16090 for (insn = tail; insn != head; insn = PREV_INSN (insn))
16091 if (INSN_P (insn) && CALL_P (insn))
16093 first_arg = add_parameter_dependencies (insn, head);
16094 if (first_arg)
16096 /* Add dependee for first argument to predecessors if only
16097 region contains more than one block. */
16098 basic_block bb = BLOCK_FOR_INSN (insn);
16099 int rgn = CONTAINING_RGN (bb->index);
16100 int nr_blks = RGN_NR_BLOCKS (rgn);
16101 /* Skip trivial regions and region head blocks that can have
16102 predecessors outside of region. */
16103 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
16105 edge e;
16106 edge_iterator ei;
16108 /* Regions are SCCs with the exception of selective
16109 scheduling with pipelining of outer blocks enabled.
16110 So also check that immediate predecessors of a non-head
16111 block are in the same region. */
16112 FOR_EACH_EDGE (e, ei, bb->preds)
16114 /* Avoid creating of loop-carried dependencies through
16115 using topological ordering in the region. */
16116 if (rgn == CONTAINING_RGN (e->src->index)
16117 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
16118 add_dependee_for_func_arg (first_arg, e->src);
16121 insn = first_arg;
16122 if (insn == head)
16123 break;
16126 else if (first_arg)
16127 avoid_func_arg_motion (first_arg, insn);
16130 /* Hook for pre-reload schedule - set priority of moves from likely spilled
16131 HW registers to maximum, to schedule them at soon as possible. These are
16132 moves from function argument registers at the top of the function entry
16133 and moves from function return value registers after call. */
16134 static int
16135 ix86_adjust_priority (rtx_insn *insn, int priority)
16137 rtx set;
16139 if (reload_completed)
16140 return priority;
16142 if (!NONDEBUG_INSN_P (insn))
16143 return priority;
16145 set = single_set (insn);
16146 if (set)
16148 rtx tmp = SET_SRC (set);
16149 if (REG_P (tmp)
16150 && HARD_REGISTER_P (tmp)
16151 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
16152 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
16153 return current_sched_info->sched_max_insns_priority;
16156 return priority;
16159 /* Prepare for scheduling pass. */
16160 static void
16161 ix86_sched_init_global (FILE *, int, int)
16163 /* Install scheduling hooks for current CPU. Some of these hooks are used
16164 in time-critical parts of the scheduler, so we only set them up when
16165 they are actually used. */
16166 switch (ix86_tune)
16168 case PROCESSOR_CORE2:
16169 case PROCESSOR_NEHALEM:
16170 case PROCESSOR_SANDYBRIDGE:
16171 case PROCESSOR_HASWELL:
16172 case PROCESSOR_GENERIC:
16173 /* Do not perform multipass scheduling for pre-reload schedule
16174 to save compile time. */
16175 if (reload_completed)
16177 ix86_core2i7_init_hooks ();
16178 break;
16180 /* Fall through. */
16181 default:
16182 targetm.sched.dfa_post_advance_cycle = NULL;
16183 targetm.sched.first_cycle_multipass_init = NULL;
16184 targetm.sched.first_cycle_multipass_begin = NULL;
16185 targetm.sched.first_cycle_multipass_issue = NULL;
16186 targetm.sched.first_cycle_multipass_backtrack = NULL;
16187 targetm.sched.first_cycle_multipass_end = NULL;
16188 targetm.sched.first_cycle_multipass_fini = NULL;
16189 break;
16194 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
16196 static HOST_WIDE_INT
16197 ix86_static_rtx_alignment (machine_mode mode)
16199 if (mode == DFmode)
16200 return 64;
16201 if (ALIGN_MODE_128 (mode))
16202 return MAX (128, GET_MODE_ALIGNMENT (mode));
16203 return GET_MODE_ALIGNMENT (mode);
16206 /* Implement TARGET_CONSTANT_ALIGNMENT. */
16208 static HOST_WIDE_INT
16209 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
16211 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
16212 || TREE_CODE (exp) == INTEGER_CST)
16214 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
16215 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
16216 return MAX (mode_align, align);
16218 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16219 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16220 return BITS_PER_WORD;
16222 return align;
16225 /* Implement TARGET_EMPTY_RECORD_P. */
16227 static bool
16228 ix86_is_empty_record (const_tree type)
16230 if (!TARGET_64BIT)
16231 return false;
16232 return default_is_empty_record (type);
16235 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
16237 static void
16238 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
16240 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
16242 if (!cum->warn_empty)
16243 return;
16245 if (!TYPE_EMPTY_P (type))
16246 return;
16248 /* Don't warn if the function isn't visible outside of the TU. */
16249 if (cum->decl && !TREE_PUBLIC (cum->decl))
16250 return;
16252 const_tree ctx = get_ultimate_context (cum->decl);
16253 if (ctx != NULL_TREE
16254 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
16255 return;
16257 /* If the actual size of the type is zero, then there is no change
16258 in how objects of this size are passed. */
16259 if (int_size_in_bytes (type) == 0)
16260 return;
16262 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
16263 "changes in %<-fabi-version=12%> (GCC 8)", type);
16265 /* Only warn once. */
16266 cum->warn_empty = false;
16269 /* This hook returns name of multilib ABI. */
16271 static const char *
16272 ix86_get_multilib_abi_name (void)
16274 if (!(TARGET_64BIT_P (ix86_isa_flags)))
16275 return "i386";
16276 else if (TARGET_X32_P (ix86_isa_flags))
16277 return "x32";
16278 else
16279 return "x86_64";
16282 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
16283 the data type, and ALIGN is the alignment that the object would
16284 ordinarily have. */
16286 static int
16287 iamcu_alignment (tree type, int align)
16289 machine_mode mode;
16291 if (align < 32 || TYPE_USER_ALIGN (type))
16292 return align;
16294 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
16295 bytes. */
16296 mode = TYPE_MODE (strip_array_types (type));
16297 switch (GET_MODE_CLASS (mode))
16299 case MODE_INT:
16300 case MODE_COMPLEX_INT:
16301 case MODE_COMPLEX_FLOAT:
16302 case MODE_FLOAT:
16303 case MODE_DECIMAL_FLOAT:
16304 return 32;
16305 default:
16306 return align;
16310 /* Compute the alignment for a static variable.
16311 TYPE is the data type, and ALIGN is the alignment that
16312 the object would ordinarily have. The value of this function is used
16313 instead of that alignment to align the object. */
16316 ix86_data_alignment (tree type, unsigned int align, bool opt)
16318 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
16319 for symbols from other compilation units or symbols that don't need
16320 to bind locally. In order to preserve some ABI compatibility with
16321 those compilers, ensure we don't decrease alignment from what we
16322 used to assume. */
16324 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
16326 /* A data structure, equal or greater than the size of a cache line
16327 (64 bytes in the Pentium 4 and other recent Intel processors, including
16328 processors based on Intel Core microarchitecture) should be aligned
16329 so that its base address is a multiple of a cache line size. */
16331 unsigned int max_align
16332 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
16334 if (max_align < BITS_PER_WORD)
16335 max_align = BITS_PER_WORD;
16337 switch (ix86_align_data_type)
16339 case ix86_align_data_type_abi: opt = false; break;
16340 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
16341 case ix86_align_data_type_cacheline: break;
16344 if (TARGET_IAMCU)
16345 align = iamcu_alignment (type, align);
16347 if (opt
16348 && AGGREGATE_TYPE_P (type)
16349 && TYPE_SIZE (type)
16350 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
16352 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
16353 && align < max_align_compat)
16354 align = max_align_compat;
16355 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
16356 && align < max_align)
16357 align = max_align;
16360 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16361 to 16byte boundary. */
16362 if (TARGET_64BIT)
16364 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
16365 && TYPE_SIZE (type)
16366 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16367 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
16368 && align < 128)
16369 return 128;
16372 if (!opt)
16373 return align;
16375 if (TREE_CODE (type) == ARRAY_TYPE)
16377 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16378 return 64;
16379 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16380 return 128;
16382 else if (TREE_CODE (type) == COMPLEX_TYPE)
16385 if (TYPE_MODE (type) == DCmode && align < 64)
16386 return 64;
16387 if ((TYPE_MODE (type) == XCmode
16388 || TYPE_MODE (type) == TCmode) && align < 128)
16389 return 128;
16391 else if ((TREE_CODE (type) == RECORD_TYPE
16392 || TREE_CODE (type) == UNION_TYPE
16393 || TREE_CODE (type) == QUAL_UNION_TYPE)
16394 && TYPE_FIELDS (type))
16396 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16397 return 64;
16398 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16399 return 128;
16401 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16402 || TREE_CODE (type) == INTEGER_TYPE)
16404 if (TYPE_MODE (type) == DFmode && align < 64)
16405 return 64;
16406 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16407 return 128;
16410 return align;
16413 /* Compute the alignment for a local variable or a stack slot. EXP is
16414 the data type or decl itself, MODE is the widest mode available and
16415 ALIGN is the alignment that the object would ordinarily have. The
16416 value of this macro is used instead of that alignment to align the
16417 object. */
16419 unsigned int
16420 ix86_local_alignment (tree exp, machine_mode mode,
16421 unsigned int align)
16423 tree type, decl;
16425 if (exp && DECL_P (exp))
16427 type = TREE_TYPE (exp);
16428 decl = exp;
16430 else
16432 type = exp;
16433 decl = NULL;
16436 /* Don't do dynamic stack realignment for long long objects with
16437 -mpreferred-stack-boundary=2. */
16438 if (!TARGET_64BIT
16439 && align == 64
16440 && ix86_preferred_stack_boundary < 64
16441 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
16442 && (!type || !TYPE_USER_ALIGN (type))
16443 && (!decl || !DECL_USER_ALIGN (decl)))
16444 align = 32;
16446 /* If TYPE is NULL, we are allocating a stack slot for caller-save
16447 register in MODE. We will return the largest alignment of XF
16448 and DF. */
16449 if (!type)
16451 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
16452 align = GET_MODE_ALIGNMENT (DFmode);
16453 return align;
16456 /* Don't increase alignment for Intel MCU psABI. */
16457 if (TARGET_IAMCU)
16458 return align;
16460 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16461 to 16byte boundary. Exact wording is:
16463 An array uses the same alignment as its elements, except that a local or
16464 global array variable of length at least 16 bytes or
16465 a C99 variable-length array variable always has alignment of at least 16 bytes.
16467 This was added to allow use of aligned SSE instructions at arrays. This
16468 rule is meant for static storage (where compiler cannot do the analysis
16469 by itself). We follow it for automatic variables only when convenient.
16470 We fully control everything in the function compiled and functions from
16471 other unit cannot rely on the alignment.
16473 Exclude va_list type. It is the common case of local array where
16474 we cannot benefit from the alignment.
16476 TODO: Probably one should optimize for size only when var is not escaping. */
16477 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
16478 && TARGET_SSE)
16480 if (AGGREGATE_TYPE_P (type)
16481 && (va_list_type_node == NULL_TREE
16482 || (TYPE_MAIN_VARIANT (type)
16483 != TYPE_MAIN_VARIANT (va_list_type_node)))
16484 && TYPE_SIZE (type)
16485 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16486 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
16487 && align < 128)
16488 return 128;
16490 if (TREE_CODE (type) == ARRAY_TYPE)
16492 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16493 return 64;
16494 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16495 return 128;
16497 else if (TREE_CODE (type) == COMPLEX_TYPE)
16499 if (TYPE_MODE (type) == DCmode && align < 64)
16500 return 64;
16501 if ((TYPE_MODE (type) == XCmode
16502 || TYPE_MODE (type) == TCmode) && align < 128)
16503 return 128;
16505 else if ((TREE_CODE (type) == RECORD_TYPE
16506 || TREE_CODE (type) == UNION_TYPE
16507 || TREE_CODE (type) == QUAL_UNION_TYPE)
16508 && TYPE_FIELDS (type))
16510 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16511 return 64;
16512 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16513 return 128;
16515 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16516 || TREE_CODE (type) == INTEGER_TYPE)
16519 if (TYPE_MODE (type) == DFmode && align < 64)
16520 return 64;
16521 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16522 return 128;
16524 return align;
16527 /* Compute the minimum required alignment for dynamic stack realignment
16528 purposes for a local variable, parameter or a stack slot. EXP is
16529 the data type or decl itself, MODE is its mode and ALIGN is the
16530 alignment that the object would ordinarily have. */
16532 unsigned int
16533 ix86_minimum_alignment (tree exp, machine_mode mode,
16534 unsigned int align)
16536 tree type, decl;
16538 if (exp && DECL_P (exp))
16540 type = TREE_TYPE (exp);
16541 decl = exp;
16543 else
16545 type = exp;
16546 decl = NULL;
16549 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
16550 return align;
16552 /* Don't do dynamic stack realignment for long long objects with
16553 -mpreferred-stack-boundary=2. */
16554 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
16555 && (!type || !TYPE_USER_ALIGN (type))
16556 && (!decl || !DECL_USER_ALIGN (decl)))
16558 gcc_checking_assert (!TARGET_STV);
16559 return 32;
16562 return align;
16565 /* Find a location for the static chain incoming to a nested function.
16566 This is a register, unless all free registers are used by arguments. */
16568 static rtx
16569 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
16571 unsigned regno;
16573 if (TARGET_64BIT)
16575 /* We always use R10 in 64-bit mode. */
16576 regno = R10_REG;
16578 else
16580 const_tree fntype, fndecl;
16581 unsigned int ccvt;
16583 /* By default in 32-bit mode we use ECX to pass the static chain. */
16584 regno = CX_REG;
16586 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
16588 fntype = TREE_TYPE (fndecl_or_type);
16589 fndecl = fndecl_or_type;
16591 else
16593 fntype = fndecl_or_type;
16594 fndecl = NULL;
16597 ccvt = ix86_get_callcvt (fntype);
16598 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
16600 /* Fastcall functions use ecx/edx for arguments, which leaves
16601 us with EAX for the static chain.
16602 Thiscall functions use ecx for arguments, which also
16603 leaves us with EAX for the static chain. */
16604 regno = AX_REG;
16606 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
16608 /* Thiscall functions use ecx for arguments, which leaves
16609 us with EAX and EDX for the static chain.
16610 We are using for abi-compatibility EAX. */
16611 regno = AX_REG;
16613 else if (ix86_function_regparm (fntype, fndecl) == 3)
16615 /* For regparm 3, we have no free call-clobbered registers in
16616 which to store the static chain. In order to implement this,
16617 we have the trampoline push the static chain to the stack.
16618 However, we can't push a value below the return address when
16619 we call the nested function directly, so we have to use an
16620 alternate entry point. For this we use ESI, and have the
16621 alternate entry point push ESI, so that things appear the
16622 same once we're executing the nested function. */
16623 if (incoming_p)
16625 if (fndecl == current_function_decl
16626 && !ix86_static_chain_on_stack)
16628 gcc_assert (!reload_completed);
16629 ix86_static_chain_on_stack = true;
16631 return gen_frame_mem (SImode,
16632 plus_constant (Pmode,
16633 arg_pointer_rtx, -8));
16635 regno = SI_REG;
16639 return gen_rtx_REG (Pmode, regno);
16642 /* Emit RTL insns to initialize the variable parts of a trampoline.
16643 FNDECL is the decl of the target address; M_TRAMP is a MEM for
16644 the trampoline, and CHAIN_VALUE is an RTX for the static chain
16645 to be passed to the target function. */
16647 static void
16648 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
16650 rtx mem, fnaddr;
16651 int opcode;
16652 int offset = 0;
16653 bool need_endbr = (flag_cf_protection & CF_BRANCH);
16655 fnaddr = XEXP (DECL_RTL (fndecl), 0);
16657 if (TARGET_64BIT)
16659 int size;
16661 if (need_endbr)
16663 /* Insert ENDBR64. */
16664 mem = adjust_address (m_tramp, SImode, offset);
16665 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
16666 offset += 4;
16669 /* Load the function address to r11. Try to load address using
16670 the shorter movl instead of movabs. We may want to support
16671 movq for kernel mode, but kernel does not use trampolines at
16672 the moment. FNADDR is a 32bit address and may not be in
16673 DImode when ptr_mode == SImode. Always use movl in this
16674 case. */
16675 if (ptr_mode == SImode
16676 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16678 fnaddr = copy_addr_to_reg (fnaddr);
16680 mem = adjust_address (m_tramp, HImode, offset);
16681 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
16683 mem = adjust_address (m_tramp, SImode, offset + 2);
16684 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
16685 offset += 6;
16687 else
16689 mem = adjust_address (m_tramp, HImode, offset);
16690 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
16692 mem = adjust_address (m_tramp, DImode, offset + 2);
16693 emit_move_insn (mem, fnaddr);
16694 offset += 10;
16697 /* Load static chain using movabs to r10. Use the shorter movl
16698 instead of movabs when ptr_mode == SImode. */
16699 if (ptr_mode == SImode)
16701 opcode = 0xba41;
16702 size = 6;
16704 else
16706 opcode = 0xba49;
16707 size = 10;
16710 mem = adjust_address (m_tramp, HImode, offset);
16711 emit_move_insn (mem, gen_int_mode (opcode, HImode));
16713 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
16714 emit_move_insn (mem, chain_value);
16715 offset += size;
16717 /* Jump to r11; the last (unused) byte is a nop, only there to
16718 pad the write out to a single 32-bit store. */
16719 mem = adjust_address (m_tramp, SImode, offset);
16720 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
16721 offset += 4;
16723 else
16725 rtx disp, chain;
16727 /* Depending on the static chain location, either load a register
16728 with a constant, or push the constant to the stack. All of the
16729 instructions are the same size. */
16730 chain = ix86_static_chain (fndecl, true);
16731 if (REG_P (chain))
16733 switch (REGNO (chain))
16735 case AX_REG:
16736 opcode = 0xb8; break;
16737 case CX_REG:
16738 opcode = 0xb9; break;
16739 default:
16740 gcc_unreachable ();
16743 else
16744 opcode = 0x68;
16746 if (need_endbr)
16748 /* Insert ENDBR32. */
16749 mem = adjust_address (m_tramp, SImode, offset);
16750 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
16751 offset += 4;
16754 mem = adjust_address (m_tramp, QImode, offset);
16755 emit_move_insn (mem, gen_int_mode (opcode, QImode));
16757 mem = adjust_address (m_tramp, SImode, offset + 1);
16758 emit_move_insn (mem, chain_value);
16759 offset += 5;
16761 mem = adjust_address (m_tramp, QImode, offset);
16762 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
16764 mem = adjust_address (m_tramp, SImode, offset + 1);
16766 /* Compute offset from the end of the jmp to the target function.
16767 In the case in which the trampoline stores the static chain on
16768 the stack, we need to skip the first insn which pushes the
16769 (call-saved) register static chain; this push is 1 byte. */
16770 offset += 5;
16771 disp = expand_binop (SImode, sub_optab, fnaddr,
16772 plus_constant (Pmode, XEXP (m_tramp, 0),
16773 offset - (MEM_P (chain) ? 1 : 0)),
16774 NULL_RTX, 1, OPTAB_DIRECT);
16775 emit_move_insn (mem, disp);
16778 gcc_assert (offset <= TRAMPOLINE_SIZE);
16780 #ifdef HAVE_ENABLE_EXECUTE_STACK
16781 #ifdef CHECK_EXECUTE_STACK_ENABLED
16782 if (CHECK_EXECUTE_STACK_ENABLED)
16783 #endif
16784 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16785 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
16786 #endif
16789 static bool
16790 ix86_allocate_stack_slots_for_args (void)
16792 /* Naked functions should not allocate stack slots for arguments. */
16793 return !ix86_function_naked (current_function_decl);
16796 static bool
16797 ix86_warn_func_return (tree decl)
16799 /* Naked functions are implemented entirely in assembly, including the
16800 return sequence, so suppress warnings about this. */
16801 return !ix86_function_naked (decl);
16804 /* Return the shift count of a vector by scalar shift builtin second argument
16805 ARG1. */
16806 static tree
16807 ix86_vector_shift_count (tree arg1)
16809 if (tree_fits_uhwi_p (arg1))
16810 return arg1;
16811 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
16813 /* The count argument is weird, passed in as various 128-bit
16814 (or 64-bit) vectors, the low 64 bits from it are the count. */
16815 unsigned char buf[16];
16816 int len = native_encode_expr (arg1, buf, 16);
16817 if (len == 0)
16818 return NULL_TREE;
16819 tree t = native_interpret_expr (uint64_type_node, buf, len);
16820 if (t && tree_fits_uhwi_p (t))
16821 return t;
16823 return NULL_TREE;
16826 static tree
16827 ix86_fold_builtin (tree fndecl, int n_args,
16828 tree *args, bool ignore ATTRIBUTE_UNUSED)
16830 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
16832 enum ix86_builtins fn_code = (enum ix86_builtins)
16833 DECL_FUNCTION_CODE (fndecl);
16834 enum rtx_code rcode;
16835 bool is_vshift;
16836 unsigned HOST_WIDE_INT mask;
16838 switch (fn_code)
16840 case IX86_BUILTIN_CPU_IS:
16841 case IX86_BUILTIN_CPU_SUPPORTS:
16842 gcc_assert (n_args == 1);
16843 return fold_builtin_cpu (fndecl, args);
16845 case IX86_BUILTIN_NANQ:
16846 case IX86_BUILTIN_NANSQ:
16848 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16849 const char *str = c_getstr (*args);
16850 int quiet = fn_code == IX86_BUILTIN_NANQ;
16851 REAL_VALUE_TYPE real;
16853 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
16854 return build_real (type, real);
16855 return NULL_TREE;
16858 case IX86_BUILTIN_INFQ:
16859 case IX86_BUILTIN_HUGE_VALQ:
16861 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16862 REAL_VALUE_TYPE inf;
16863 real_inf (&inf);
16864 return build_real (type, inf);
16867 case IX86_BUILTIN_TZCNT16:
16868 case IX86_BUILTIN_CTZS:
16869 case IX86_BUILTIN_TZCNT32:
16870 case IX86_BUILTIN_TZCNT64:
16871 gcc_assert (n_args == 1);
16872 if (TREE_CODE (args[0]) == INTEGER_CST)
16874 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16875 tree arg = args[0];
16876 if (fn_code == IX86_BUILTIN_TZCNT16
16877 || fn_code == IX86_BUILTIN_CTZS)
16878 arg = fold_convert (short_unsigned_type_node, arg);
16879 if (integer_zerop (arg))
16880 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
16881 else
16882 return fold_const_call (CFN_CTZ, type, arg);
16884 break;
16886 case IX86_BUILTIN_LZCNT16:
16887 case IX86_BUILTIN_CLZS:
16888 case IX86_BUILTIN_LZCNT32:
16889 case IX86_BUILTIN_LZCNT64:
16890 gcc_assert (n_args == 1);
16891 if (TREE_CODE (args[0]) == INTEGER_CST)
16893 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16894 tree arg = args[0];
16895 if (fn_code == IX86_BUILTIN_LZCNT16
16896 || fn_code == IX86_BUILTIN_CLZS)
16897 arg = fold_convert (short_unsigned_type_node, arg);
16898 if (integer_zerop (arg))
16899 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
16900 else
16901 return fold_const_call (CFN_CLZ, type, arg);
16903 break;
16905 case IX86_BUILTIN_BEXTR32:
16906 case IX86_BUILTIN_BEXTR64:
16907 case IX86_BUILTIN_BEXTRI32:
16908 case IX86_BUILTIN_BEXTRI64:
16909 gcc_assert (n_args == 2);
16910 if (tree_fits_uhwi_p (args[1]))
16912 unsigned HOST_WIDE_INT res = 0;
16913 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
16914 unsigned int start = tree_to_uhwi (args[1]);
16915 unsigned int len = (start & 0xff00) >> 8;
16916 start &= 0xff;
16917 if (start >= prec || len == 0)
16918 res = 0;
16919 else if (!tree_fits_uhwi_p (args[0]))
16920 break;
16921 else
16922 res = tree_to_uhwi (args[0]) >> start;
16923 if (len > prec)
16924 len = prec;
16925 if (len < HOST_BITS_PER_WIDE_INT)
16926 res &= (HOST_WIDE_INT_1U << len) - 1;
16927 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
16929 break;
16931 case IX86_BUILTIN_BZHI32:
16932 case IX86_BUILTIN_BZHI64:
16933 gcc_assert (n_args == 2);
16934 if (tree_fits_uhwi_p (args[1]))
16936 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
16937 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
16938 return args[0];
16939 if (idx == 0)
16940 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
16941 if (!tree_fits_uhwi_p (args[0]))
16942 break;
16943 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
16944 res &= ~(HOST_WIDE_INT_M1U << idx);
16945 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
16947 break;
16949 case IX86_BUILTIN_PDEP32:
16950 case IX86_BUILTIN_PDEP64:
16951 gcc_assert (n_args == 2);
16952 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
16954 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
16955 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
16956 unsigned HOST_WIDE_INT res = 0;
16957 unsigned HOST_WIDE_INT m, k = 1;
16958 for (m = 1; m; m <<= 1)
16959 if ((mask & m) != 0)
16961 if ((src & k) != 0)
16962 res |= m;
16963 k <<= 1;
16965 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
16967 break;
16969 case IX86_BUILTIN_PEXT32:
16970 case IX86_BUILTIN_PEXT64:
16971 gcc_assert (n_args == 2);
16972 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
16974 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
16975 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
16976 unsigned HOST_WIDE_INT res = 0;
16977 unsigned HOST_WIDE_INT m, k = 1;
16978 for (m = 1; m; m <<= 1)
16979 if ((mask & m) != 0)
16981 if ((src & m) != 0)
16982 res |= k;
16983 k <<= 1;
16985 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
16987 break;
16989 case IX86_BUILTIN_MOVMSKPS:
16990 case IX86_BUILTIN_PMOVMSKB:
16991 case IX86_BUILTIN_MOVMSKPD:
16992 case IX86_BUILTIN_PMOVMSKB128:
16993 case IX86_BUILTIN_MOVMSKPD256:
16994 case IX86_BUILTIN_MOVMSKPS256:
16995 case IX86_BUILTIN_PMOVMSKB256:
16996 gcc_assert (n_args == 1);
16997 if (TREE_CODE (args[0]) == VECTOR_CST)
16999 HOST_WIDE_INT res = 0;
17000 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
17002 tree e = VECTOR_CST_ELT (args[0], i);
17003 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
17005 if (wi::neg_p (wi::to_wide (e)))
17006 res |= HOST_WIDE_INT_1 << i;
17008 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
17010 if (TREE_REAL_CST (e).sign)
17011 res |= HOST_WIDE_INT_1 << i;
17013 else
17014 return NULL_TREE;
17016 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
17018 break;
17020 case IX86_BUILTIN_PSLLD:
17021 case IX86_BUILTIN_PSLLD128:
17022 case IX86_BUILTIN_PSLLD128_MASK:
17023 case IX86_BUILTIN_PSLLD256:
17024 case IX86_BUILTIN_PSLLD256_MASK:
17025 case IX86_BUILTIN_PSLLD512:
17026 case IX86_BUILTIN_PSLLDI:
17027 case IX86_BUILTIN_PSLLDI128:
17028 case IX86_BUILTIN_PSLLDI128_MASK:
17029 case IX86_BUILTIN_PSLLDI256:
17030 case IX86_BUILTIN_PSLLDI256_MASK:
17031 case IX86_BUILTIN_PSLLDI512:
17032 case IX86_BUILTIN_PSLLQ:
17033 case IX86_BUILTIN_PSLLQ128:
17034 case IX86_BUILTIN_PSLLQ128_MASK:
17035 case IX86_BUILTIN_PSLLQ256:
17036 case IX86_BUILTIN_PSLLQ256_MASK:
17037 case IX86_BUILTIN_PSLLQ512:
17038 case IX86_BUILTIN_PSLLQI:
17039 case IX86_BUILTIN_PSLLQI128:
17040 case IX86_BUILTIN_PSLLQI128_MASK:
17041 case IX86_BUILTIN_PSLLQI256:
17042 case IX86_BUILTIN_PSLLQI256_MASK:
17043 case IX86_BUILTIN_PSLLQI512:
17044 case IX86_BUILTIN_PSLLW:
17045 case IX86_BUILTIN_PSLLW128:
17046 case IX86_BUILTIN_PSLLW128_MASK:
17047 case IX86_BUILTIN_PSLLW256:
17048 case IX86_BUILTIN_PSLLW256_MASK:
17049 case IX86_BUILTIN_PSLLW512_MASK:
17050 case IX86_BUILTIN_PSLLWI:
17051 case IX86_BUILTIN_PSLLWI128:
17052 case IX86_BUILTIN_PSLLWI128_MASK:
17053 case IX86_BUILTIN_PSLLWI256:
17054 case IX86_BUILTIN_PSLLWI256_MASK:
17055 case IX86_BUILTIN_PSLLWI512_MASK:
17056 rcode = ASHIFT;
17057 is_vshift = false;
17058 goto do_shift;
17059 case IX86_BUILTIN_PSRAD:
17060 case IX86_BUILTIN_PSRAD128:
17061 case IX86_BUILTIN_PSRAD128_MASK:
17062 case IX86_BUILTIN_PSRAD256:
17063 case IX86_BUILTIN_PSRAD256_MASK:
17064 case IX86_BUILTIN_PSRAD512:
17065 case IX86_BUILTIN_PSRADI:
17066 case IX86_BUILTIN_PSRADI128:
17067 case IX86_BUILTIN_PSRADI128_MASK:
17068 case IX86_BUILTIN_PSRADI256:
17069 case IX86_BUILTIN_PSRADI256_MASK:
17070 case IX86_BUILTIN_PSRADI512:
17071 case IX86_BUILTIN_PSRAQ128_MASK:
17072 case IX86_BUILTIN_PSRAQ256_MASK:
17073 case IX86_BUILTIN_PSRAQ512:
17074 case IX86_BUILTIN_PSRAQI128_MASK:
17075 case IX86_BUILTIN_PSRAQI256_MASK:
17076 case IX86_BUILTIN_PSRAQI512:
17077 case IX86_BUILTIN_PSRAW:
17078 case IX86_BUILTIN_PSRAW128:
17079 case IX86_BUILTIN_PSRAW128_MASK:
17080 case IX86_BUILTIN_PSRAW256:
17081 case IX86_BUILTIN_PSRAW256_MASK:
17082 case IX86_BUILTIN_PSRAW512:
17083 case IX86_BUILTIN_PSRAWI:
17084 case IX86_BUILTIN_PSRAWI128:
17085 case IX86_BUILTIN_PSRAWI128_MASK:
17086 case IX86_BUILTIN_PSRAWI256:
17087 case IX86_BUILTIN_PSRAWI256_MASK:
17088 case IX86_BUILTIN_PSRAWI512:
17089 rcode = ASHIFTRT;
17090 is_vshift = false;
17091 goto do_shift;
17092 case IX86_BUILTIN_PSRLD:
17093 case IX86_BUILTIN_PSRLD128:
17094 case IX86_BUILTIN_PSRLD128_MASK:
17095 case IX86_BUILTIN_PSRLD256:
17096 case IX86_BUILTIN_PSRLD256_MASK:
17097 case IX86_BUILTIN_PSRLD512:
17098 case IX86_BUILTIN_PSRLDI:
17099 case IX86_BUILTIN_PSRLDI128:
17100 case IX86_BUILTIN_PSRLDI128_MASK:
17101 case IX86_BUILTIN_PSRLDI256:
17102 case IX86_BUILTIN_PSRLDI256_MASK:
17103 case IX86_BUILTIN_PSRLDI512:
17104 case IX86_BUILTIN_PSRLQ:
17105 case IX86_BUILTIN_PSRLQ128:
17106 case IX86_BUILTIN_PSRLQ128_MASK:
17107 case IX86_BUILTIN_PSRLQ256:
17108 case IX86_BUILTIN_PSRLQ256_MASK:
17109 case IX86_BUILTIN_PSRLQ512:
17110 case IX86_BUILTIN_PSRLQI:
17111 case IX86_BUILTIN_PSRLQI128:
17112 case IX86_BUILTIN_PSRLQI128_MASK:
17113 case IX86_BUILTIN_PSRLQI256:
17114 case IX86_BUILTIN_PSRLQI256_MASK:
17115 case IX86_BUILTIN_PSRLQI512:
17116 case IX86_BUILTIN_PSRLW:
17117 case IX86_BUILTIN_PSRLW128:
17118 case IX86_BUILTIN_PSRLW128_MASK:
17119 case IX86_BUILTIN_PSRLW256:
17120 case IX86_BUILTIN_PSRLW256_MASK:
17121 case IX86_BUILTIN_PSRLW512:
17122 case IX86_BUILTIN_PSRLWI:
17123 case IX86_BUILTIN_PSRLWI128:
17124 case IX86_BUILTIN_PSRLWI128_MASK:
17125 case IX86_BUILTIN_PSRLWI256:
17126 case IX86_BUILTIN_PSRLWI256_MASK:
17127 case IX86_BUILTIN_PSRLWI512:
17128 rcode = LSHIFTRT;
17129 is_vshift = false;
17130 goto do_shift;
17131 case IX86_BUILTIN_PSLLVV16HI:
17132 case IX86_BUILTIN_PSLLVV16SI:
17133 case IX86_BUILTIN_PSLLVV2DI:
17134 case IX86_BUILTIN_PSLLVV2DI_MASK:
17135 case IX86_BUILTIN_PSLLVV32HI:
17136 case IX86_BUILTIN_PSLLVV4DI:
17137 case IX86_BUILTIN_PSLLVV4DI_MASK:
17138 case IX86_BUILTIN_PSLLVV4SI:
17139 case IX86_BUILTIN_PSLLVV4SI_MASK:
17140 case IX86_BUILTIN_PSLLVV8DI:
17141 case IX86_BUILTIN_PSLLVV8HI:
17142 case IX86_BUILTIN_PSLLVV8SI:
17143 case IX86_BUILTIN_PSLLVV8SI_MASK:
17144 rcode = ASHIFT;
17145 is_vshift = true;
17146 goto do_shift;
17147 case IX86_BUILTIN_PSRAVQ128:
17148 case IX86_BUILTIN_PSRAVQ256:
17149 case IX86_BUILTIN_PSRAVV16HI:
17150 case IX86_BUILTIN_PSRAVV16SI:
17151 case IX86_BUILTIN_PSRAVV32HI:
17152 case IX86_BUILTIN_PSRAVV4SI:
17153 case IX86_BUILTIN_PSRAVV4SI_MASK:
17154 case IX86_BUILTIN_PSRAVV8DI:
17155 case IX86_BUILTIN_PSRAVV8HI:
17156 case IX86_BUILTIN_PSRAVV8SI:
17157 case IX86_BUILTIN_PSRAVV8SI_MASK:
17158 rcode = ASHIFTRT;
17159 is_vshift = true;
17160 goto do_shift;
17161 case IX86_BUILTIN_PSRLVV16HI:
17162 case IX86_BUILTIN_PSRLVV16SI:
17163 case IX86_BUILTIN_PSRLVV2DI:
17164 case IX86_BUILTIN_PSRLVV2DI_MASK:
17165 case IX86_BUILTIN_PSRLVV32HI:
17166 case IX86_BUILTIN_PSRLVV4DI:
17167 case IX86_BUILTIN_PSRLVV4DI_MASK:
17168 case IX86_BUILTIN_PSRLVV4SI:
17169 case IX86_BUILTIN_PSRLVV4SI_MASK:
17170 case IX86_BUILTIN_PSRLVV8DI:
17171 case IX86_BUILTIN_PSRLVV8HI:
17172 case IX86_BUILTIN_PSRLVV8SI:
17173 case IX86_BUILTIN_PSRLVV8SI_MASK:
17174 rcode = LSHIFTRT;
17175 is_vshift = true;
17176 goto do_shift;
17178 do_shift:
17179 gcc_assert (n_args >= 2);
17180 if (TREE_CODE (args[0]) != VECTOR_CST)
17181 break;
17182 mask = HOST_WIDE_INT_M1U;
17183 if (n_args > 2)
17185 /* This is masked shift. */
17186 if (!tree_fits_uhwi_p (args[n_args - 1])
17187 || TREE_SIDE_EFFECTS (args[n_args - 2]))
17188 break;
17189 mask = tree_to_uhwi (args[n_args - 1]);
17190 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
17191 mask |= HOST_WIDE_INT_M1U << elems;
17192 if (mask != HOST_WIDE_INT_M1U
17193 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
17194 break;
17195 if (mask == (HOST_WIDE_INT_M1U << elems))
17196 return args[n_args - 2];
17198 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
17199 break;
17200 if (tree tem = (is_vshift ? integer_one_node
17201 : ix86_vector_shift_count (args[1])))
17203 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
17204 unsigned HOST_WIDE_INT prec
17205 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
17206 if (count == 0 && mask == HOST_WIDE_INT_M1U)
17207 return args[0];
17208 if (count >= prec)
17210 if (rcode == ASHIFTRT)
17211 count = prec - 1;
17212 else if (mask == HOST_WIDE_INT_M1U)
17213 return build_zero_cst (TREE_TYPE (args[0]));
17215 tree countt = NULL_TREE;
17216 if (!is_vshift)
17218 if (count >= prec)
17219 countt = integer_zero_node;
17220 else
17221 countt = build_int_cst (integer_type_node, count);
17223 tree_vector_builder builder;
17224 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
17225 false);
17226 unsigned int cnt = builder.encoded_nelts ();
17227 for (unsigned int i = 0; i < cnt; ++i)
17229 tree elt = VECTOR_CST_ELT (args[0], i);
17230 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
17231 return NULL_TREE;
17232 tree type = TREE_TYPE (elt);
17233 if (rcode == LSHIFTRT)
17234 elt = fold_convert (unsigned_type_for (type), elt);
17235 if (is_vshift)
17237 countt = VECTOR_CST_ELT (args[1], i);
17238 if (TREE_CODE (countt) != INTEGER_CST
17239 || TREE_OVERFLOW (countt))
17240 return NULL_TREE;
17241 if (wi::neg_p (wi::to_wide (countt))
17242 || wi::to_widest (countt) >= prec)
17244 if (rcode == ASHIFTRT)
17245 countt = build_int_cst (TREE_TYPE (countt),
17246 prec - 1);
17247 else
17249 elt = build_zero_cst (TREE_TYPE (elt));
17250 countt = build_zero_cst (TREE_TYPE (countt));
17254 else if (count >= prec)
17255 elt = build_zero_cst (TREE_TYPE (elt));
17256 elt = const_binop (rcode == ASHIFT
17257 ? LSHIFT_EXPR : RSHIFT_EXPR,
17258 TREE_TYPE (elt), elt, countt);
17259 if (!elt || TREE_CODE (elt) != INTEGER_CST)
17260 return NULL_TREE;
17261 if (rcode == LSHIFTRT)
17262 elt = fold_convert (type, elt);
17263 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
17265 elt = VECTOR_CST_ELT (args[n_args - 2], i);
17266 if (TREE_CODE (elt) != INTEGER_CST
17267 || TREE_OVERFLOW (elt))
17268 return NULL_TREE;
17270 builder.quick_push (elt);
17272 return builder.build ();
17274 break;
17276 default:
17277 break;
17281 #ifdef SUBTARGET_FOLD_BUILTIN
17282 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
17283 #endif
17285 return NULL_TREE;
17288 /* Fold a MD builtin (use ix86_fold_builtin for folding into
17289 constant) in GIMPLE. */
17291 bool
17292 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
17294 gimple *stmt = gsi_stmt (*gsi);
17295 tree fndecl = gimple_call_fndecl (stmt);
17296 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
17297 int n_args = gimple_call_num_args (stmt);
17298 enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl);
17299 tree decl = NULL_TREE;
17300 tree arg0, arg1, arg2;
17301 enum rtx_code rcode;
17302 unsigned HOST_WIDE_INT count;
17303 bool is_vshift;
17305 switch (fn_code)
17307 case IX86_BUILTIN_TZCNT32:
17308 decl = builtin_decl_implicit (BUILT_IN_CTZ);
17309 goto fold_tzcnt_lzcnt;
17311 case IX86_BUILTIN_TZCNT64:
17312 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
17313 goto fold_tzcnt_lzcnt;
17315 case IX86_BUILTIN_LZCNT32:
17316 decl = builtin_decl_implicit (BUILT_IN_CLZ);
17317 goto fold_tzcnt_lzcnt;
17319 case IX86_BUILTIN_LZCNT64:
17320 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
17321 goto fold_tzcnt_lzcnt;
17323 fold_tzcnt_lzcnt:
17324 gcc_assert (n_args == 1);
17325 arg0 = gimple_call_arg (stmt, 0);
17326 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
17328 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
17329 /* If arg0 is provably non-zero, optimize into generic
17330 __builtin_c[tl]z{,ll} function the middle-end handles
17331 better. */
17332 if (!expr_not_equal_to (arg0, wi::zero (prec)))
17333 return false;
17335 location_t loc = gimple_location (stmt);
17336 gimple *g = gimple_build_call (decl, 1, arg0);
17337 gimple_set_location (g, loc);
17338 tree lhs = make_ssa_name (integer_type_node);
17339 gimple_call_set_lhs (g, lhs);
17340 gsi_insert_before (gsi, g, GSI_SAME_STMT);
17341 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
17342 gimple_set_location (g, loc);
17343 gsi_replace (gsi, g, false);
17344 return true;
17346 break;
17348 case IX86_BUILTIN_BZHI32:
17349 case IX86_BUILTIN_BZHI64:
17350 gcc_assert (n_args == 2);
17351 arg1 = gimple_call_arg (stmt, 1);
17352 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
17354 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
17355 arg0 = gimple_call_arg (stmt, 0);
17356 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
17357 break;
17358 location_t loc = gimple_location (stmt);
17359 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17360 gimple_set_location (g, loc);
17361 gsi_replace (gsi, g, false);
17362 return true;
17364 break;
17366 case IX86_BUILTIN_PDEP32:
17367 case IX86_BUILTIN_PDEP64:
17368 case IX86_BUILTIN_PEXT32:
17369 case IX86_BUILTIN_PEXT64:
17370 gcc_assert (n_args == 2);
17371 arg1 = gimple_call_arg (stmt, 1);
17372 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
17374 location_t loc = gimple_location (stmt);
17375 arg0 = gimple_call_arg (stmt, 0);
17376 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17377 gimple_set_location (g, loc);
17378 gsi_replace (gsi, g, false);
17379 return true;
17381 break;
17383 case IX86_BUILTIN_PSLLD:
17384 case IX86_BUILTIN_PSLLD128:
17385 case IX86_BUILTIN_PSLLD128_MASK:
17386 case IX86_BUILTIN_PSLLD256:
17387 case IX86_BUILTIN_PSLLD256_MASK:
17388 case IX86_BUILTIN_PSLLD512:
17389 case IX86_BUILTIN_PSLLDI:
17390 case IX86_BUILTIN_PSLLDI128:
17391 case IX86_BUILTIN_PSLLDI128_MASK:
17392 case IX86_BUILTIN_PSLLDI256:
17393 case IX86_BUILTIN_PSLLDI256_MASK:
17394 case IX86_BUILTIN_PSLLDI512:
17395 case IX86_BUILTIN_PSLLQ:
17396 case IX86_BUILTIN_PSLLQ128:
17397 case IX86_BUILTIN_PSLLQ128_MASK:
17398 case IX86_BUILTIN_PSLLQ256:
17399 case IX86_BUILTIN_PSLLQ256_MASK:
17400 case IX86_BUILTIN_PSLLQ512:
17401 case IX86_BUILTIN_PSLLQI:
17402 case IX86_BUILTIN_PSLLQI128:
17403 case IX86_BUILTIN_PSLLQI128_MASK:
17404 case IX86_BUILTIN_PSLLQI256:
17405 case IX86_BUILTIN_PSLLQI256_MASK:
17406 case IX86_BUILTIN_PSLLQI512:
17407 case IX86_BUILTIN_PSLLW:
17408 case IX86_BUILTIN_PSLLW128:
17409 case IX86_BUILTIN_PSLLW128_MASK:
17410 case IX86_BUILTIN_PSLLW256:
17411 case IX86_BUILTIN_PSLLW256_MASK:
17412 case IX86_BUILTIN_PSLLW512_MASK:
17413 case IX86_BUILTIN_PSLLWI:
17414 case IX86_BUILTIN_PSLLWI128:
17415 case IX86_BUILTIN_PSLLWI128_MASK:
17416 case IX86_BUILTIN_PSLLWI256:
17417 case IX86_BUILTIN_PSLLWI256_MASK:
17418 case IX86_BUILTIN_PSLLWI512_MASK:
17419 rcode = ASHIFT;
17420 is_vshift = false;
17421 goto do_shift;
17422 case IX86_BUILTIN_PSRAD:
17423 case IX86_BUILTIN_PSRAD128:
17424 case IX86_BUILTIN_PSRAD128_MASK:
17425 case IX86_BUILTIN_PSRAD256:
17426 case IX86_BUILTIN_PSRAD256_MASK:
17427 case IX86_BUILTIN_PSRAD512:
17428 case IX86_BUILTIN_PSRADI:
17429 case IX86_BUILTIN_PSRADI128:
17430 case IX86_BUILTIN_PSRADI128_MASK:
17431 case IX86_BUILTIN_PSRADI256:
17432 case IX86_BUILTIN_PSRADI256_MASK:
17433 case IX86_BUILTIN_PSRADI512:
17434 case IX86_BUILTIN_PSRAQ128_MASK:
17435 case IX86_BUILTIN_PSRAQ256_MASK:
17436 case IX86_BUILTIN_PSRAQ512:
17437 case IX86_BUILTIN_PSRAQI128_MASK:
17438 case IX86_BUILTIN_PSRAQI256_MASK:
17439 case IX86_BUILTIN_PSRAQI512:
17440 case IX86_BUILTIN_PSRAW:
17441 case IX86_BUILTIN_PSRAW128:
17442 case IX86_BUILTIN_PSRAW128_MASK:
17443 case IX86_BUILTIN_PSRAW256:
17444 case IX86_BUILTIN_PSRAW256_MASK:
17445 case IX86_BUILTIN_PSRAW512:
17446 case IX86_BUILTIN_PSRAWI:
17447 case IX86_BUILTIN_PSRAWI128:
17448 case IX86_BUILTIN_PSRAWI128_MASK:
17449 case IX86_BUILTIN_PSRAWI256:
17450 case IX86_BUILTIN_PSRAWI256_MASK:
17451 case IX86_BUILTIN_PSRAWI512:
17452 rcode = ASHIFTRT;
17453 is_vshift = false;
17454 goto do_shift;
17455 case IX86_BUILTIN_PSRLD:
17456 case IX86_BUILTIN_PSRLD128:
17457 case IX86_BUILTIN_PSRLD128_MASK:
17458 case IX86_BUILTIN_PSRLD256:
17459 case IX86_BUILTIN_PSRLD256_MASK:
17460 case IX86_BUILTIN_PSRLD512:
17461 case IX86_BUILTIN_PSRLDI:
17462 case IX86_BUILTIN_PSRLDI128:
17463 case IX86_BUILTIN_PSRLDI128_MASK:
17464 case IX86_BUILTIN_PSRLDI256:
17465 case IX86_BUILTIN_PSRLDI256_MASK:
17466 case IX86_BUILTIN_PSRLDI512:
17467 case IX86_BUILTIN_PSRLQ:
17468 case IX86_BUILTIN_PSRLQ128:
17469 case IX86_BUILTIN_PSRLQ128_MASK:
17470 case IX86_BUILTIN_PSRLQ256:
17471 case IX86_BUILTIN_PSRLQ256_MASK:
17472 case IX86_BUILTIN_PSRLQ512:
17473 case IX86_BUILTIN_PSRLQI:
17474 case IX86_BUILTIN_PSRLQI128:
17475 case IX86_BUILTIN_PSRLQI128_MASK:
17476 case IX86_BUILTIN_PSRLQI256:
17477 case IX86_BUILTIN_PSRLQI256_MASK:
17478 case IX86_BUILTIN_PSRLQI512:
17479 case IX86_BUILTIN_PSRLW:
17480 case IX86_BUILTIN_PSRLW128:
17481 case IX86_BUILTIN_PSRLW128_MASK:
17482 case IX86_BUILTIN_PSRLW256:
17483 case IX86_BUILTIN_PSRLW256_MASK:
17484 case IX86_BUILTIN_PSRLW512:
17485 case IX86_BUILTIN_PSRLWI:
17486 case IX86_BUILTIN_PSRLWI128:
17487 case IX86_BUILTIN_PSRLWI128_MASK:
17488 case IX86_BUILTIN_PSRLWI256:
17489 case IX86_BUILTIN_PSRLWI256_MASK:
17490 case IX86_BUILTIN_PSRLWI512:
17491 rcode = LSHIFTRT;
17492 is_vshift = false;
17493 goto do_shift;
17494 case IX86_BUILTIN_PSLLVV16HI:
17495 case IX86_BUILTIN_PSLLVV16SI:
17496 case IX86_BUILTIN_PSLLVV2DI:
17497 case IX86_BUILTIN_PSLLVV2DI_MASK:
17498 case IX86_BUILTIN_PSLLVV32HI:
17499 case IX86_BUILTIN_PSLLVV4DI:
17500 case IX86_BUILTIN_PSLLVV4DI_MASK:
17501 case IX86_BUILTIN_PSLLVV4SI:
17502 case IX86_BUILTIN_PSLLVV4SI_MASK:
17503 case IX86_BUILTIN_PSLLVV8DI:
17504 case IX86_BUILTIN_PSLLVV8HI:
17505 case IX86_BUILTIN_PSLLVV8SI:
17506 case IX86_BUILTIN_PSLLVV8SI_MASK:
17507 rcode = ASHIFT;
17508 is_vshift = true;
17509 goto do_shift;
17510 case IX86_BUILTIN_PSRAVQ128:
17511 case IX86_BUILTIN_PSRAVQ256:
17512 case IX86_BUILTIN_PSRAVV16HI:
17513 case IX86_BUILTIN_PSRAVV16SI:
17514 case IX86_BUILTIN_PSRAVV32HI:
17515 case IX86_BUILTIN_PSRAVV4SI:
17516 case IX86_BUILTIN_PSRAVV4SI_MASK:
17517 case IX86_BUILTIN_PSRAVV8DI:
17518 case IX86_BUILTIN_PSRAVV8HI:
17519 case IX86_BUILTIN_PSRAVV8SI:
17520 case IX86_BUILTIN_PSRAVV8SI_MASK:
17521 rcode = ASHIFTRT;
17522 is_vshift = true;
17523 goto do_shift;
17524 case IX86_BUILTIN_PSRLVV16HI:
17525 case IX86_BUILTIN_PSRLVV16SI:
17526 case IX86_BUILTIN_PSRLVV2DI:
17527 case IX86_BUILTIN_PSRLVV2DI_MASK:
17528 case IX86_BUILTIN_PSRLVV32HI:
17529 case IX86_BUILTIN_PSRLVV4DI:
17530 case IX86_BUILTIN_PSRLVV4DI_MASK:
17531 case IX86_BUILTIN_PSRLVV4SI:
17532 case IX86_BUILTIN_PSRLVV4SI_MASK:
17533 case IX86_BUILTIN_PSRLVV8DI:
17534 case IX86_BUILTIN_PSRLVV8HI:
17535 case IX86_BUILTIN_PSRLVV8SI:
17536 case IX86_BUILTIN_PSRLVV8SI_MASK:
17537 rcode = LSHIFTRT;
17538 is_vshift = true;
17539 goto do_shift;
17541 do_shift:
17542 gcc_assert (n_args >= 2);
17543 arg0 = gimple_call_arg (stmt, 0);
17544 arg1 = gimple_call_arg (stmt, 1);
17545 if (n_args > 2)
17547 /* This is masked shift. Only optimize if the mask is all ones. */
17548 tree argl = gimple_call_arg (stmt, n_args - 1);
17549 if (!tree_fits_uhwi_p (argl))
17550 break;
17551 unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl);
17552 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
17553 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
17554 break;
17556 if (is_vshift)
17558 if (TREE_CODE (arg1) != VECTOR_CST)
17559 break;
17560 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
17561 if (integer_zerop (arg1))
17562 count = 0;
17563 else if (rcode == ASHIFTRT)
17564 break;
17565 else
17566 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
17568 tree elt = VECTOR_CST_ELT (arg1, i);
17569 if (!wi::neg_p (wi::to_wide (elt))
17570 && wi::to_widest (elt) < count)
17571 return false;
17574 else
17576 arg1 = ix86_vector_shift_count (arg1);
17577 if (!arg1)
17578 break;
17579 count = tree_to_uhwi (arg1);
17581 if (count == 0)
17583 /* Just return the first argument for shift by 0. */
17584 location_t loc = gimple_location (stmt);
17585 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17586 gimple_set_location (g, loc);
17587 gsi_replace (gsi, g, false);
17588 return true;
17590 if (rcode != ASHIFTRT
17591 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
17593 /* For shift counts equal or greater than precision, except for
17594 arithmetic right shift the result is zero. */
17595 location_t loc = gimple_location (stmt);
17596 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
17597 build_zero_cst (TREE_TYPE (arg0)));
17598 gimple_set_location (g, loc);
17599 gsi_replace (gsi, g, false);
17600 return true;
17602 break;
17604 case IX86_BUILTIN_SHUFPD:
17605 arg2 = gimple_call_arg (stmt, 2);
17606 if (TREE_CODE (arg2) == INTEGER_CST)
17608 location_t loc = gimple_location (stmt);
17609 unsigned HOST_WIDE_INT imask = TREE_INT_CST_LOW (arg2);
17610 arg0 = gimple_call_arg (stmt, 0);
17611 arg1 = gimple_call_arg (stmt, 1);
17612 tree itype = long_long_integer_type_node;
17613 tree vtype = build_vector_type (itype, 2); /* V2DI */
17614 tree_vector_builder elts (vtype, 2, 1);
17615 /* Ignore bits other than the lowest 2. */
17616 elts.quick_push (build_int_cst (itype, imask & 1));
17617 imask >>= 1;
17618 elts.quick_push (build_int_cst (itype, 2 + (imask & 1)));
17619 tree omask = elts.build ();
17620 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
17621 VEC_PERM_EXPR,
17622 arg0, arg1, omask);
17623 gimple_set_location (g, loc);
17624 gsi_replace (gsi, g, false);
17625 return true;
17627 // Do not error yet, the constant could be propagated later?
17628 break;
17630 default:
17631 break;
17634 return false;
17637 /* Handler for an SVML-style interface to
17638 a library with vectorized intrinsics. */
17640 tree
17641 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
17643 char name[20];
17644 tree fntype, new_fndecl, args;
17645 unsigned arity;
17646 const char *bname;
17647 machine_mode el_mode, in_mode;
17648 int n, in_n;
17650 /* The SVML is suitable for unsafe math only. */
17651 if (!flag_unsafe_math_optimizations)
17652 return NULL_TREE;
17654 el_mode = TYPE_MODE (TREE_TYPE (type_out));
17655 n = TYPE_VECTOR_SUBPARTS (type_out);
17656 in_mode = TYPE_MODE (TREE_TYPE (type_in));
17657 in_n = TYPE_VECTOR_SUBPARTS (type_in);
17658 if (el_mode != in_mode
17659 || n != in_n)
17660 return NULL_TREE;
17662 switch (fn)
17664 CASE_CFN_EXP:
17665 CASE_CFN_LOG:
17666 CASE_CFN_LOG10:
17667 CASE_CFN_POW:
17668 CASE_CFN_TANH:
17669 CASE_CFN_TAN:
17670 CASE_CFN_ATAN:
17671 CASE_CFN_ATAN2:
17672 CASE_CFN_ATANH:
17673 CASE_CFN_CBRT:
17674 CASE_CFN_SINH:
17675 CASE_CFN_SIN:
17676 CASE_CFN_ASINH:
17677 CASE_CFN_ASIN:
17678 CASE_CFN_COSH:
17679 CASE_CFN_COS:
17680 CASE_CFN_ACOSH:
17681 CASE_CFN_ACOS:
17682 if ((el_mode != DFmode || n != 2)
17683 && (el_mode != SFmode || n != 4))
17684 return NULL_TREE;
17685 break;
17687 default:
17688 return NULL_TREE;
17691 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
17692 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
17694 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
17695 strcpy (name, "vmlsLn4");
17696 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
17697 strcpy (name, "vmldLn2");
17698 else if (n == 4)
17700 sprintf (name, "vmls%s", bname+10);
17701 name[strlen (name)-1] = '4';
17703 else
17704 sprintf (name, "vmld%s2", bname+10);
17706 /* Convert to uppercase. */
17707 name[4] &= ~0x20;
17709 arity = 0;
17710 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
17711 arity++;
17713 if (arity == 1)
17714 fntype = build_function_type_list (type_out, type_in, NULL);
17715 else
17716 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
17718 /* Build a function declaration for the vectorized function. */
17719 new_fndecl = build_decl (BUILTINS_LOCATION,
17720 FUNCTION_DECL, get_identifier (name), fntype);
17721 TREE_PUBLIC (new_fndecl) = 1;
17722 DECL_EXTERNAL (new_fndecl) = 1;
17723 DECL_IS_NOVOPS (new_fndecl) = 1;
17724 TREE_READONLY (new_fndecl) = 1;
17726 return new_fndecl;
17729 /* Handler for an ACML-style interface to
17730 a library with vectorized intrinsics. */
17732 tree
17733 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
17735 char name[20] = "__vr.._";
17736 tree fntype, new_fndecl, args;
17737 unsigned arity;
17738 const char *bname;
17739 machine_mode el_mode, in_mode;
17740 int n, in_n;
17742 /* The ACML is 64bits only and suitable for unsafe math only as
17743 it does not correctly support parts of IEEE with the required
17744 precision such as denormals. */
17745 if (!TARGET_64BIT
17746 || !flag_unsafe_math_optimizations)
17747 return NULL_TREE;
17749 el_mode = TYPE_MODE (TREE_TYPE (type_out));
17750 n = TYPE_VECTOR_SUBPARTS (type_out);
17751 in_mode = TYPE_MODE (TREE_TYPE (type_in));
17752 in_n = TYPE_VECTOR_SUBPARTS (type_in);
17753 if (el_mode != in_mode
17754 || n != in_n)
17755 return NULL_TREE;
17757 switch (fn)
17759 CASE_CFN_SIN:
17760 CASE_CFN_COS:
17761 CASE_CFN_EXP:
17762 CASE_CFN_LOG:
17763 CASE_CFN_LOG2:
17764 CASE_CFN_LOG10:
17765 if (el_mode == DFmode && n == 2)
17767 name[4] = 'd';
17768 name[5] = '2';
17770 else if (el_mode == SFmode && n == 4)
17772 name[4] = 's';
17773 name[5] = '4';
17775 else
17776 return NULL_TREE;
17777 break;
17779 default:
17780 return NULL_TREE;
17783 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
17784 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
17785 sprintf (name + 7, "%s", bname+10);
17787 arity = 0;
17788 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
17789 arity++;
17791 if (arity == 1)
17792 fntype = build_function_type_list (type_out, type_in, NULL);
17793 else
17794 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
17796 /* Build a function declaration for the vectorized function. */
17797 new_fndecl = build_decl (BUILTINS_LOCATION,
17798 FUNCTION_DECL, get_identifier (name), fntype);
17799 TREE_PUBLIC (new_fndecl) = 1;
17800 DECL_EXTERNAL (new_fndecl) = 1;
17801 DECL_IS_NOVOPS (new_fndecl) = 1;
17802 TREE_READONLY (new_fndecl) = 1;
17804 return new_fndecl;
17807 /* Returns a decl of a function that implements scatter store with
17808 register type VECTYPE and index type INDEX_TYPE and SCALE.
17809 Return NULL_TREE if it is not available. */
17811 static tree
17812 ix86_vectorize_builtin_scatter (const_tree vectype,
17813 const_tree index_type, int scale)
17815 bool si;
17816 enum ix86_builtins code;
17818 if (!TARGET_AVX512F)
17819 return NULL_TREE;
17821 if ((TREE_CODE (index_type) != INTEGER_TYPE
17822 && !POINTER_TYPE_P (index_type))
17823 || (TYPE_MODE (index_type) != SImode
17824 && TYPE_MODE (index_type) != DImode))
17825 return NULL_TREE;
17827 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
17828 return NULL_TREE;
17830 /* v*scatter* insn sign extends index to pointer mode. */
17831 if (TYPE_PRECISION (index_type) < POINTER_SIZE
17832 && TYPE_UNSIGNED (index_type))
17833 return NULL_TREE;
17835 /* Scale can be 1, 2, 4 or 8. */
17836 if (scale <= 0
17837 || scale > 8
17838 || (scale & (scale - 1)) != 0)
17839 return NULL_TREE;
17841 si = TYPE_MODE (index_type) == SImode;
17842 switch (TYPE_MODE (vectype))
17844 case E_V8DFmode:
17845 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
17846 break;
17847 case E_V8DImode:
17848 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
17849 break;
17850 case E_V16SFmode:
17851 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
17852 break;
17853 case E_V16SImode:
17854 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
17855 break;
17856 case E_V4DFmode:
17857 if (TARGET_AVX512VL)
17858 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
17859 else
17860 return NULL_TREE;
17861 break;
17862 case E_V4DImode:
17863 if (TARGET_AVX512VL)
17864 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
17865 else
17866 return NULL_TREE;
17867 break;
17868 case E_V8SFmode:
17869 if (TARGET_AVX512VL)
17870 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
17871 else
17872 return NULL_TREE;
17873 break;
17874 case E_V8SImode:
17875 if (TARGET_AVX512VL)
17876 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
17877 else
17878 return NULL_TREE;
17879 break;
17880 case E_V2DFmode:
17881 if (TARGET_AVX512VL)
17882 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
17883 else
17884 return NULL_TREE;
17885 break;
17886 case E_V2DImode:
17887 if (TARGET_AVX512VL)
17888 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
17889 else
17890 return NULL_TREE;
17891 break;
17892 case E_V4SFmode:
17893 if (TARGET_AVX512VL)
17894 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
17895 else
17896 return NULL_TREE;
17897 break;
17898 case E_V4SImode:
17899 if (TARGET_AVX512VL)
17900 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
17901 else
17902 return NULL_TREE;
17903 break;
17904 default:
17905 return NULL_TREE;
17908 return get_ix86_builtin (code);
17911 /* Return true if it is safe to use the rsqrt optabs to optimize
17912 1.0/sqrt. */
17914 static bool
17915 use_rsqrt_p ()
17917 return (TARGET_SSE && TARGET_SSE_MATH
17918 && flag_finite_math_only
17919 && !flag_trapping_math
17920 && flag_unsafe_math_optimizations);
17923 /* Helper for avx_vpermilps256_operand et al. This is also used by
17924 the expansion functions to turn the parallel back into a mask.
17925 The return value is 0 for no match and the imm8+1 for a match. */
17928 avx_vpermilp_parallel (rtx par, machine_mode mode)
17930 unsigned i, nelt = GET_MODE_NUNITS (mode);
17931 unsigned mask = 0;
17932 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
17934 if (XVECLEN (par, 0) != (int) nelt)
17935 return 0;
17937 /* Validate that all of the elements are constants, and not totally
17938 out of range. Copy the data into an integral array to make the
17939 subsequent checks easier. */
17940 for (i = 0; i < nelt; ++i)
17942 rtx er = XVECEXP (par, 0, i);
17943 unsigned HOST_WIDE_INT ei;
17945 if (!CONST_INT_P (er))
17946 return 0;
17947 ei = INTVAL (er);
17948 if (ei >= nelt)
17949 return 0;
17950 ipar[i] = ei;
17953 switch (mode)
17955 case E_V8DFmode:
17956 /* In the 512-bit DFmode case, we can only move elements within
17957 a 128-bit lane. First fill the second part of the mask,
17958 then fallthru. */
17959 for (i = 4; i < 6; ++i)
17961 if (ipar[i] < 4 || ipar[i] >= 6)
17962 return 0;
17963 mask |= (ipar[i] - 4) << i;
17965 for (i = 6; i < 8; ++i)
17967 if (ipar[i] < 6)
17968 return 0;
17969 mask |= (ipar[i] - 6) << i;
17971 /* FALLTHRU */
17973 case E_V4DFmode:
17974 /* In the 256-bit DFmode case, we can only move elements within
17975 a 128-bit lane. */
17976 for (i = 0; i < 2; ++i)
17978 if (ipar[i] >= 2)
17979 return 0;
17980 mask |= ipar[i] << i;
17982 for (i = 2; i < 4; ++i)
17984 if (ipar[i] < 2)
17985 return 0;
17986 mask |= (ipar[i] - 2) << i;
17988 break;
17990 case E_V16SFmode:
17991 /* In 512 bit SFmode case, permutation in the upper 256 bits
17992 must mirror the permutation in the lower 256-bits. */
17993 for (i = 0; i < 8; ++i)
17994 if (ipar[i] + 8 != ipar[i + 8])
17995 return 0;
17996 /* FALLTHRU */
17998 case E_V8SFmode:
17999 /* In 256 bit SFmode case, we have full freedom of
18000 movement within the low 128-bit lane, but the high 128-bit
18001 lane must mirror the exact same pattern. */
18002 for (i = 0; i < 4; ++i)
18003 if (ipar[i] + 4 != ipar[i + 4])
18004 return 0;
18005 nelt = 4;
18006 /* FALLTHRU */
18008 case E_V2DFmode:
18009 case E_V4SFmode:
18010 /* In the 128-bit case, we've full freedom in the placement of
18011 the elements from the source operand. */
18012 for (i = 0; i < nelt; ++i)
18013 mask |= ipar[i] << (i * (nelt / 2));
18014 break;
18016 default:
18017 gcc_unreachable ();
18020 /* Make sure success has a non-zero value by adding one. */
18021 return mask + 1;
18024 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
18025 the expansion functions to turn the parallel back into a mask.
18026 The return value is 0 for no match and the imm8+1 for a match. */
18029 avx_vperm2f128_parallel (rtx par, machine_mode mode)
18031 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
18032 unsigned mask = 0;
18033 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
18035 if (XVECLEN (par, 0) != (int) nelt)
18036 return 0;
18038 /* Validate that all of the elements are constants, and not totally
18039 out of range. Copy the data into an integral array to make the
18040 subsequent checks easier. */
18041 for (i = 0; i < nelt; ++i)
18043 rtx er = XVECEXP (par, 0, i);
18044 unsigned HOST_WIDE_INT ei;
18046 if (!CONST_INT_P (er))
18047 return 0;
18048 ei = INTVAL (er);
18049 if (ei >= 2 * nelt)
18050 return 0;
18051 ipar[i] = ei;
18054 /* Validate that the halves of the permute are halves. */
18055 for (i = 0; i < nelt2 - 1; ++i)
18056 if (ipar[i] + 1 != ipar[i + 1])
18057 return 0;
18058 for (i = nelt2; i < nelt - 1; ++i)
18059 if (ipar[i] + 1 != ipar[i + 1])
18060 return 0;
18062 /* Reconstruct the mask. */
18063 for (i = 0; i < 2; ++i)
18065 unsigned e = ipar[i * nelt2];
18066 if (e % nelt2)
18067 return 0;
18068 e /= nelt2;
18069 mask |= e << (i * 4);
18072 /* Make sure success has a non-zero value by adding one. */
18073 return mask + 1;
18076 /* Return a register priority for hard reg REGNO. */
18077 static int
18078 ix86_register_priority (int hard_regno)
18080 /* ebp and r13 as the base always wants a displacement, r12 as the
18081 base always wants an index. So discourage their usage in an
18082 address. */
18083 if (hard_regno == R12_REG || hard_regno == R13_REG)
18084 return 0;
18085 if (hard_regno == BP_REG)
18086 return 1;
18087 /* New x86-64 int registers result in bigger code size. Discourage
18088 them. */
18089 if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
18090 return 2;
18091 /* New x86-64 SSE registers result in bigger code size. Discourage
18092 them. */
18093 if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
18094 return 2;
18095 if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG))
18096 return 1;
18097 /* Usage of AX register results in smaller code. Prefer it. */
18098 if (hard_regno == AX_REG)
18099 return 4;
18100 return 3;
18103 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
18105 Put float CONST_DOUBLE in the constant pool instead of fp regs.
18106 QImode must go into class Q_REGS.
18107 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18108 movdf to do mem-to-mem moves through integer regs. */
18110 static reg_class_t
18111 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
18113 machine_mode mode = GET_MODE (x);
18115 /* We're only allowed to return a subclass of CLASS. Many of the
18116 following checks fail for NO_REGS, so eliminate that early. */
18117 if (regclass == NO_REGS)
18118 return NO_REGS;
18120 /* All classes can load zeros. */
18121 if (x == CONST0_RTX (mode))
18122 return regclass;
18124 /* Force constants into memory if we are loading a (nonzero) constant into
18125 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
18126 instructions to load from a constant. */
18127 if (CONSTANT_P (x)
18128 && (MAYBE_MMX_CLASS_P (regclass)
18129 || MAYBE_SSE_CLASS_P (regclass)
18130 || MAYBE_MASK_CLASS_P (regclass)))
18131 return NO_REGS;
18133 /* Floating-point constants need more complex checks. */
18134 if (CONST_DOUBLE_P (x))
18136 /* General regs can load everything. */
18137 if (INTEGER_CLASS_P (regclass))
18138 return regclass;
18140 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18141 zero above. We only want to wind up preferring 80387 registers if
18142 we plan on doing computation with them. */
18143 if (IS_STACK_MODE (mode)
18144 && standard_80387_constant_p (x) > 0)
18146 /* Limit class to FP regs. */
18147 if (FLOAT_CLASS_P (regclass))
18148 return FLOAT_REGS;
18151 return NO_REGS;
18154 /* Prefer SSE regs only, if we can use them for math. */
18155 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18156 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
18158 /* Generally when we see PLUS here, it's the function invariant
18159 (plus soft-fp const_int). Which can only be computed into general
18160 regs. */
18161 if (GET_CODE (x) == PLUS)
18162 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
18164 /* QImode constants are easy to load, but non-constant QImode data
18165 must go into Q_REGS. */
18166 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18168 if (Q_CLASS_P (regclass))
18169 return regclass;
18170 else if (reg_class_subset_p (Q_REGS, regclass))
18171 return Q_REGS;
18172 else
18173 return NO_REGS;
18176 return regclass;
18179 /* Discourage putting floating-point values in SSE registers unless
18180 SSE math is being used, and likewise for the 387 registers. */
18181 static reg_class_t
18182 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
18184 machine_mode mode = GET_MODE (x);
18186 /* Restrict the output reload class to the register bank that we are doing
18187 math on. If we would like not to return a subset of CLASS, reject this
18188 alternative: if reload cannot do this, it will still use its choice. */
18189 mode = GET_MODE (x);
18190 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18191 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
18193 if (IS_STACK_MODE (mode))
18194 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
18196 return regclass;
18199 static reg_class_t
18200 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
18201 machine_mode mode, secondary_reload_info *sri)
18203 /* Double-word spills from general registers to non-offsettable memory
18204 references (zero-extended addresses) require special handling. */
18205 if (TARGET_64BIT
18206 && MEM_P (x)
18207 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
18208 && INTEGER_CLASS_P (rclass)
18209 && !offsettable_memref_p (x))
18211 sri->icode = (in_p
18212 ? CODE_FOR_reload_noff_load
18213 : CODE_FOR_reload_noff_store);
18214 /* Add the cost of moving address to a temporary. */
18215 sri->extra_cost = 1;
18217 return NO_REGS;
18220 /* QImode spills from non-QI registers require
18221 intermediate register on 32bit targets. */
18222 if (mode == QImode
18223 && ((!TARGET_64BIT && !in_p
18224 && INTEGER_CLASS_P (rclass)
18225 && MAYBE_NON_Q_CLASS_P (rclass))
18226 || (!TARGET_AVX512DQ
18227 && MAYBE_MASK_CLASS_P (rclass))))
18229 int regno = true_regnum (x);
18231 /* Return Q_REGS if the operand is in memory. */
18232 if (regno == -1)
18233 return Q_REGS;
18235 return NO_REGS;
18238 /* This condition handles corner case where an expression involving
18239 pointers gets vectorized. We're trying to use the address of a
18240 stack slot as a vector initializer.
18242 (set (reg:V2DI 74 [ vect_cst_.2 ])
18243 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
18245 Eventually frame gets turned into sp+offset like this:
18247 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18248 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18249 (const_int 392 [0x188]))))
18251 That later gets turned into:
18253 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18254 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18255 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
18257 We'll have the following reload recorded:
18259 Reload 0: reload_in (DI) =
18260 (plus:DI (reg/f:DI 7 sp)
18261 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
18262 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18263 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
18264 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
18265 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18266 reload_reg_rtx: (reg:V2DI 22 xmm1)
18268 Which isn't going to work since SSE instructions can't handle scalar
18269 additions. Returning GENERAL_REGS forces the addition into integer
18270 register and reload can handle subsequent reloads without problems. */
18272 if (in_p && GET_CODE (x) == PLUS
18273 && SSE_CLASS_P (rclass)
18274 && SCALAR_INT_MODE_P (mode))
18275 return GENERAL_REGS;
18277 return NO_REGS;
18280 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
18282 static bool
18283 ix86_class_likely_spilled_p (reg_class_t rclass)
18285 switch (rclass)
18287 case AREG:
18288 case DREG:
18289 case CREG:
18290 case BREG:
18291 case AD_REGS:
18292 case SIREG:
18293 case DIREG:
18294 case SSE_FIRST_REG:
18295 case FP_TOP_REG:
18296 case FP_SECOND_REG:
18297 return true;
18299 default:
18300 break;
18303 return false;
18306 /* If we are copying between registers from different register sets
18307 (e.g. FP and integer), we may need a memory location.
18309 The function can't work reliably when one of the CLASSES is a class
18310 containing registers from multiple sets. We avoid this by never combining
18311 different sets in a single alternative in the machine description.
18312 Ensure that this constraint holds to avoid unexpected surprises.
18314 When STRICT is false, we are being called from REGISTER_MOVE_COST,
18315 so do not enforce these sanity checks.
18317 To optimize register_move_cost performance, define inline variant. */
18319 static inline bool
18320 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18321 reg_class_t class2, int strict)
18323 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
18324 return false;
18326 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18327 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18328 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18329 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18330 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18331 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
18332 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
18333 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
18335 gcc_assert (!strict || lra_in_progress);
18336 return true;
18339 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18340 return true;
18342 /* Between mask and general, we have moves no larger than word size. */
18343 if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
18344 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
18345 return true;
18347 /* ??? This is a lie. We do have moves between mmx/general, and for
18348 mmx/sse2. But by saying we need secondary memory we discourage the
18349 register allocator from using the mmx registers unless needed. */
18350 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18351 return true;
18353 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18355 /* SSE1 doesn't have any direct moves from other classes. */
18356 if (!TARGET_SSE2)
18357 return true;
18359 /* If the target says that inter-unit moves are more expensive
18360 than moving through memory, then don't generate them. */
18361 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
18362 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
18363 return true;
18365 /* Between SSE and general, we have moves no larger than word size. */
18366 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18367 return true;
18370 return false;
18373 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
18375 static bool
18376 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18377 reg_class_t class2)
18379 return inline_secondary_memory_needed (mode, class1, class2, true);
18382 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
18384 get_secondary_mem widens integral modes to BITS_PER_WORD.
18385 There is no need to emit full 64 bit move on 64 bit targets
18386 for integral modes that can be moved using 32 bit move. */
18388 static machine_mode
18389 ix86_secondary_memory_needed_mode (machine_mode mode)
18391 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
18392 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
18393 return mode;
18396 /* Implement the TARGET_CLASS_MAX_NREGS hook.
18398 On the 80386, this is the size of MODE in words,
18399 except in the FP regs, where a single reg is always enough. */
18401 static unsigned char
18402 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
18404 if (MAYBE_INTEGER_CLASS_P (rclass))
18406 if (mode == XFmode)
18407 return (TARGET_64BIT ? 2 : 3);
18408 else if (mode == XCmode)
18409 return (TARGET_64BIT ? 4 : 6);
18410 else
18411 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
18413 else
18415 if (COMPLEX_MODE_P (mode))
18416 return 2;
18417 else
18418 return 1;
18422 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
18424 static bool
18425 ix86_can_change_mode_class (machine_mode from, machine_mode to,
18426 reg_class_t regclass)
18428 if (from == to)
18429 return true;
18431 /* x87 registers can't do subreg at all, as all values are reformatted
18432 to extended precision. */
18433 if (MAYBE_FLOAT_CLASS_P (regclass))
18434 return false;
18436 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
18438 /* Vector registers do not support QI or HImode loads. If we don't
18439 disallow a change to these modes, reload will assume it's ok to
18440 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18441 the vec_dupv4hi pattern. */
18442 if (GET_MODE_SIZE (from) < 4)
18443 return false;
18446 return true;
18449 /* Return index of MODE in the sse load/store tables. */
18451 static inline int
18452 sse_store_index (machine_mode mode)
18454 switch (GET_MODE_SIZE (mode))
18456 case 4:
18457 return 0;
18458 case 8:
18459 return 1;
18460 case 16:
18461 return 2;
18462 case 32:
18463 return 3;
18464 case 64:
18465 return 4;
18466 default:
18467 return -1;
18471 /* Return the cost of moving data of mode M between a
18472 register and memory. A value of 2 is the default; this cost is
18473 relative to those in `REGISTER_MOVE_COST'.
18475 This function is used extensively by register_move_cost that is used to
18476 build tables at startup. Make it inline in this case.
18477 When IN is 2, return maximum of in and out move cost.
18479 If moving between registers and memory is more expensive than
18480 between two registers, you should define this macro to express the
18481 relative cost.
18483 Model also increased moving costs of QImode registers in non
18484 Q_REGS classes.
18486 static inline int
18487 inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
18489 int cost;
18490 if (FLOAT_CLASS_P (regclass))
18492 int index;
18493 switch (mode)
18495 case E_SFmode:
18496 index = 0;
18497 break;
18498 case E_DFmode:
18499 index = 1;
18500 break;
18501 case E_XFmode:
18502 index = 2;
18503 break;
18504 default:
18505 return 100;
18507 if (in == 2)
18508 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
18509 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18511 if (SSE_CLASS_P (regclass))
18513 int index = sse_store_index (mode);
18514 if (index == -1)
18515 return 100;
18516 if (in == 2)
18517 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
18518 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
18520 if (MMX_CLASS_P (regclass))
18522 int index;
18523 switch (GET_MODE_SIZE (mode))
18525 case 4:
18526 index = 0;
18527 break;
18528 case 8:
18529 index = 1;
18530 break;
18531 default:
18532 return 100;
18534 if (in == 2)
18535 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
18536 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
18538 switch (GET_MODE_SIZE (mode))
18540 case 1:
18541 if (Q_CLASS_P (regclass) || TARGET_64BIT)
18543 if (!in)
18544 return ix86_cost->int_store[0];
18545 if (TARGET_PARTIAL_REG_DEPENDENCY
18546 && optimize_function_for_speed_p (cfun))
18547 cost = ix86_cost->movzbl_load;
18548 else
18549 cost = ix86_cost->int_load[0];
18550 if (in == 2)
18551 return MAX (cost, ix86_cost->int_store[0]);
18552 return cost;
18554 else
18556 if (in == 2)
18557 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
18558 if (in)
18559 return ix86_cost->movzbl_load;
18560 else
18561 return ix86_cost->int_store[0] + 4;
18563 break;
18564 case 2:
18565 if (in == 2)
18566 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
18567 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
18568 default:
18569 if (in == 2)
18570 cost = MAX (ix86_cost->int_load[2], ix86_cost->int_store[2]);
18571 else if (in)
18572 cost = ix86_cost->int_load[2];
18573 else
18574 cost = ix86_cost->int_store[2];
18575 /* Multiply with the number of GPR moves needed. */
18576 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
18580 static int
18581 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
18583 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
18587 /* Return the cost of moving data from a register in class CLASS1 to
18588 one in class CLASS2.
18590 It is not required that the cost always equal 2 when FROM is the same as TO;
18591 on some machines it is expensive to move between registers if they are not
18592 general registers. */
18594 static int
18595 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
18596 reg_class_t class2_i)
18598 enum reg_class class1 = (enum reg_class) class1_i;
18599 enum reg_class class2 = (enum reg_class) class2_i;
18601 /* In case we require secondary memory, compute cost of the store followed
18602 by load. In order to avoid bad register allocation choices, we need
18603 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18605 if (inline_secondary_memory_needed (mode, class1, class2, false))
18607 int cost = 1;
18609 cost += inline_memory_move_cost (mode, class1, 2);
18610 cost += inline_memory_move_cost (mode, class2, 2);
18612 /* In case of copying from general_purpose_register we may emit multiple
18613 stores followed by single load causing memory size mismatch stall.
18614 Count this as arbitrarily high cost of 20. */
18615 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
18616 && TARGET_MEMORY_MISMATCH_STALL
18617 && targetm.class_max_nregs (class1, mode)
18618 > targetm.class_max_nregs (class2, mode))
18619 cost += 20;
18621 /* In the case of FP/MMX moves, the registers actually overlap, and we
18622 have to switch modes in order to treat them differently. */
18623 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18624 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18625 cost += 20;
18627 return cost;
18630 /* Moves between SSE/MMX and integer unit are expensive. */
18631 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18632 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18634 /* ??? By keeping returned value relatively high, we limit the number
18635 of moves between integer and MMX/SSE registers for all targets.
18636 Additionally, high value prevents problem with x86_modes_tieable_p(),
18637 where integer modes in MMX/SSE registers are not tieable
18638 because of missing QImode and HImode moves to, from or between
18639 MMX/SSE registers. */
18640 return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2)
18641 ? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer);
18643 if (MAYBE_FLOAT_CLASS_P (class1))
18644 return ix86_cost->fp_move;
18645 if (MAYBE_SSE_CLASS_P (class1))
18647 if (GET_MODE_BITSIZE (mode) <= 128)
18648 return ix86_cost->xmm_move;
18649 if (GET_MODE_BITSIZE (mode) <= 256)
18650 return ix86_cost->ymm_move;
18651 return ix86_cost->zmm_move;
18653 if (MAYBE_MMX_CLASS_P (class1))
18654 return ix86_cost->mmx_move;
18655 return 2;
18658 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
18659 words of a value of mode MODE but can be less for certain modes in
18660 special long registers.
18662 Actually there are no two word move instructions for consecutive
18663 registers. And only registers 0-3 may have mov byte instructions
18664 applied to them. */
18666 static unsigned int
18667 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
18669 if (GENERAL_REGNO_P (regno))
18671 if (mode == XFmode)
18672 return TARGET_64BIT ? 2 : 3;
18673 if (mode == XCmode)
18674 return TARGET_64BIT ? 4 : 6;
18675 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
18677 if (COMPLEX_MODE_P (mode))
18678 return 2;
18679 if (mode == V64SFmode || mode == V64SImode)
18680 return 4;
18681 return 1;
18684 /* Implement TARGET_HARD_REGNO_MODE_OK. */
18686 static bool
18687 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
18689 /* Flags and only flags can only hold CCmode values. */
18690 if (CC_REGNO_P (regno))
18691 return GET_MODE_CLASS (mode) == MODE_CC;
18692 if (GET_MODE_CLASS (mode) == MODE_CC
18693 || GET_MODE_CLASS (mode) == MODE_RANDOM
18694 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18695 return false;
18696 if (STACK_REGNO_P (regno))
18697 return VALID_FP_MODE_P (mode);
18698 if (MASK_REGNO_P (regno))
18699 return (VALID_MASK_REG_MODE (mode)
18700 || (TARGET_AVX512BW
18701 && VALID_MASK_AVX512BW_MODE (mode)));
18702 if (SSE_REGNO_P (regno))
18704 /* We implement the move patterns for all vector modes into and
18705 out of SSE registers, even when no operation instructions
18706 are available. */
18708 /* For AVX-512 we allow, regardless of regno:
18709 - XI mode
18710 - any of 512-bit wide vector mode
18711 - any scalar mode. */
18712 if (TARGET_AVX512F
18713 && (mode == XImode
18714 || VALID_AVX512F_REG_MODE (mode)
18715 || VALID_AVX512F_SCALAR_MODE (mode)))
18716 return true;
18718 /* For AVX-5124FMAPS or AVX-5124VNNIW
18719 allow V64SF and V64SI modes for special regnos. */
18720 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
18721 && (mode == V64SFmode || mode == V64SImode)
18722 && MOD4_SSE_REGNO_P (regno))
18723 return true;
18725 /* TODO check for QI/HI scalars. */
18726 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
18727 if (TARGET_AVX512VL
18728 && (mode == OImode
18729 || mode == TImode
18730 || VALID_AVX256_REG_MODE (mode)
18731 || VALID_AVX512VL_128_REG_MODE (mode)))
18732 return true;
18734 /* xmm16-xmm31 are only available for AVX-512. */
18735 if (EXT_REX_SSE_REGNO_P (regno))
18736 return false;
18738 /* OImode and AVX modes are available only when AVX is enabled. */
18739 return ((TARGET_AVX
18740 && VALID_AVX256_REG_OR_OI_MODE (mode))
18741 || VALID_SSE_REG_MODE (mode)
18742 || VALID_SSE2_REG_MODE (mode)
18743 || VALID_MMX_REG_MODE (mode)
18744 || VALID_MMX_REG_MODE_3DNOW (mode));
18746 if (MMX_REGNO_P (regno))
18748 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18749 so if the register is available at all, then we can move data of
18750 the given mode into or out of it. */
18751 return (VALID_MMX_REG_MODE (mode)
18752 || VALID_MMX_REG_MODE_3DNOW (mode));
18755 if (mode == QImode)
18757 /* Take care for QImode values - they can be in non-QI regs,
18758 but then they do cause partial register stalls. */
18759 if (ANY_QI_REGNO_P (regno))
18760 return true;
18761 if (!TARGET_PARTIAL_REG_STALL)
18762 return true;
18763 /* LRA checks if the hard register is OK for the given mode.
18764 QImode values can live in non-QI regs, so we allow all
18765 registers here. */
18766 if (lra_in_progress)
18767 return true;
18768 return !can_create_pseudo_p ();
18770 /* We handle both integer and floats in the general purpose registers. */
18771 else if (VALID_INT_MODE_P (mode))
18772 return true;
18773 else if (VALID_FP_MODE_P (mode))
18774 return true;
18775 else if (VALID_DFP_MODE_P (mode))
18776 return true;
18777 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18778 on to use that value in smaller contexts, this can easily force a
18779 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18780 supporting DImode, allow it. */
18781 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18782 return true;
18784 return false;
18787 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
18788 saves SSE registers across calls is Win64 (thus no need to check the
18789 current ABI here), and with AVX enabled Win64 only guarantees that
18790 the low 16 bytes are saved. */
18792 static bool
18793 ix86_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED,
18794 unsigned int regno, machine_mode mode)
18796 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
18799 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18800 tieable integer mode. */
18802 static bool
18803 ix86_tieable_integer_mode_p (machine_mode mode)
18805 switch (mode)
18807 case E_HImode:
18808 case E_SImode:
18809 return true;
18811 case E_QImode:
18812 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18814 case E_DImode:
18815 return TARGET_64BIT;
18817 default:
18818 return false;
18822 /* Implement TARGET_MODES_TIEABLE_P.
18824 Return true if MODE1 is accessible in a register that can hold MODE2
18825 without copying. That is, all register classes that can hold MODE2
18826 can also hold MODE1. */
18828 static bool
18829 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
18831 if (mode1 == mode2)
18832 return true;
18834 if (ix86_tieable_integer_mode_p (mode1)
18835 && ix86_tieable_integer_mode_p (mode2))
18836 return true;
18838 /* MODE2 being XFmode implies fp stack or general regs, which means we
18839 can tie any smaller floating point modes to it. Note that we do not
18840 tie this with TFmode. */
18841 if (mode2 == XFmode)
18842 return mode1 == SFmode || mode1 == DFmode;
18844 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18845 that we can tie it with SFmode. */
18846 if (mode2 == DFmode)
18847 return mode1 == SFmode;
18849 /* If MODE2 is only appropriate for an SSE register, then tie with
18850 any other mode acceptable to SSE registers. */
18851 if (GET_MODE_SIZE (mode2) == 64
18852 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18853 return (GET_MODE_SIZE (mode1) == 64
18854 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18855 if (GET_MODE_SIZE (mode2) == 32
18856 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18857 return (GET_MODE_SIZE (mode1) == 32
18858 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18859 if (GET_MODE_SIZE (mode2) == 16
18860 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18861 return (GET_MODE_SIZE (mode1) == 16
18862 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18864 /* If MODE2 is appropriate for an MMX register, then tie
18865 with any other mode acceptable to MMX registers. */
18866 if (GET_MODE_SIZE (mode2) == 8
18867 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18868 return (GET_MODE_SIZE (mode1) == 8
18869 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
18871 return false;
18874 /* Return the cost of moving between two registers of mode MODE. */
18876 static int
18877 ix86_set_reg_reg_cost (machine_mode mode)
18879 unsigned int units = UNITS_PER_WORD;
18881 switch (GET_MODE_CLASS (mode))
18883 default:
18884 break;
18886 case MODE_CC:
18887 units = GET_MODE_SIZE (CCmode);
18888 break;
18890 case MODE_FLOAT:
18891 if ((TARGET_SSE && mode == TFmode)
18892 || (TARGET_80387 && mode == XFmode)
18893 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
18894 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
18895 units = GET_MODE_SIZE (mode);
18896 break;
18898 case MODE_COMPLEX_FLOAT:
18899 if ((TARGET_SSE && mode == TCmode)
18900 || (TARGET_80387 && mode == XCmode)
18901 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
18902 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
18903 units = GET_MODE_SIZE (mode);
18904 break;
18906 case MODE_VECTOR_INT:
18907 case MODE_VECTOR_FLOAT:
18908 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
18909 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
18910 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18911 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18912 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
18913 && VALID_MMX_REG_MODE (mode)))
18914 units = GET_MODE_SIZE (mode);
18917 /* Return the cost of moving between two registers of mode MODE,
18918 assuming that the move will be in pieces of at most UNITS bytes. */
18919 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
18922 /* Return cost of vector operation in MODE given that scalar version has
18923 COST. */
18925 static int
18926 ix86_vec_cost (machine_mode mode, int cost)
18928 if (!VECTOR_MODE_P (mode))
18929 return cost;
18931 if (GET_MODE_BITSIZE (mode) == 128
18932 && TARGET_SSE_SPLIT_REGS)
18933 return cost * 2;
18934 if (GET_MODE_BITSIZE (mode) > 128
18935 && TARGET_AVX128_OPTIMAL)
18936 return cost * GET_MODE_BITSIZE (mode) / 128;
18937 return cost;
18940 /* Return cost of multiplication in MODE. */
18942 static int
18943 ix86_multiplication_cost (const struct processor_costs *cost,
18944 enum machine_mode mode)
18946 machine_mode inner_mode = mode;
18947 if (VECTOR_MODE_P (mode))
18948 inner_mode = GET_MODE_INNER (mode);
18950 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18951 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
18952 else if (X87_FLOAT_MODE_P (mode))
18953 return cost->fmul;
18954 else if (FLOAT_MODE_P (mode))
18955 return ix86_vec_cost (mode,
18956 inner_mode == DFmode ? cost->mulsd : cost->mulss);
18957 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
18959 /* vpmullq is used in this case. No emulation is needed. */
18960 if (TARGET_AVX512DQ)
18961 return ix86_vec_cost (mode, cost->mulss);
18963 /* V*QImode is emulated with 7-13 insns. */
18964 if (mode == V16QImode || mode == V32QImode)
18966 int extra = 11;
18967 if (TARGET_XOP && mode == V16QImode)
18968 extra = 5;
18969 else if (TARGET_SSSE3)
18970 extra = 6;
18971 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra);
18973 /* V*DImode is emulated with 5-8 insns. */
18974 else if (mode == V2DImode || mode == V4DImode)
18976 if (TARGET_XOP && mode == V2DImode)
18977 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3);
18978 else
18979 return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
18981 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
18982 insns, including two PMULUDQ. */
18983 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
18984 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
18985 else
18986 return ix86_vec_cost (mode, cost->mulss);
18988 else
18989 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
18992 /* Return cost of multiplication in MODE. */
18994 static int
18995 ix86_division_cost (const struct processor_costs *cost,
18996 enum machine_mode mode)
18998 machine_mode inner_mode = mode;
18999 if (VECTOR_MODE_P (mode))
19000 inner_mode = GET_MODE_INNER (mode);
19002 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19003 return inner_mode == DFmode ? cost->divsd : cost->divss;
19004 else if (X87_FLOAT_MODE_P (mode))
19005 return cost->fdiv;
19006 else if (FLOAT_MODE_P (mode))
19007 return ix86_vec_cost (mode,
19008 inner_mode == DFmode ? cost->divsd : cost->divss);
19009 else
19010 return cost->divide[MODE_INDEX (mode)];
19013 #define COSTS_N_BYTES(N) ((N) * 2)
19015 /* Return cost of shift in MODE.
19016 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
19017 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
19018 if op1 is a result of subreg.
19020 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
19022 static int
19023 ix86_shift_rotate_cost (const struct processor_costs *cost,
19024 enum machine_mode mode, bool constant_op1,
19025 HOST_WIDE_INT op1_val,
19026 bool speed,
19027 bool and_in_op1,
19028 bool shift_and_truncate,
19029 bool *skip_op0, bool *skip_op1)
19031 if (skip_op0)
19032 *skip_op0 = *skip_op1 = false;
19033 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19035 /* V*QImode is emulated with 1-11 insns. */
19036 if (mode == V16QImode || mode == V32QImode)
19038 int count = 11;
19039 if (TARGET_XOP && mode == V16QImode)
19041 /* For XOP we use vpshab, which requires a broadcast of the
19042 value to the variable shift insn. For constants this
19043 means a V16Q const in mem; even when we can perform the
19044 shift with one insn set the cost to prefer paddb. */
19045 if (constant_op1)
19047 if (skip_op1)
19048 *skip_op1 = true;
19049 return ix86_vec_cost (mode,
19050 cost->sse_op
19051 + (speed
19053 : COSTS_N_BYTES
19054 (GET_MODE_UNIT_SIZE (mode))));
19056 count = 3;
19058 else if (TARGET_SSSE3)
19059 count = 7;
19060 return ix86_vec_cost (mode, cost->sse_op * count);
19062 else
19063 return ix86_vec_cost (mode, cost->sse_op);
19065 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19067 if (constant_op1)
19069 if (op1_val > 32)
19070 return cost->shift_const + COSTS_N_INSNS (2);
19071 else
19072 return cost->shift_const * 2;
19074 else
19076 if (and_in_op1)
19077 return cost->shift_var * 2;
19078 else
19079 return cost->shift_var * 6 + COSTS_N_INSNS (2);
19082 else
19084 if (constant_op1)
19085 return cost->shift_const;
19086 else if (shift_and_truncate)
19088 if (skip_op0)
19089 *skip_op0 = *skip_op1 = true;
19090 /* Return the cost after shift-and truncation. */
19091 return cost->shift_var;
19093 else
19094 return cost->shift_var;
19096 return cost->shift_const;
19099 /* Compute a (partial) cost for rtx X. Return true if the complete
19100 cost has been computed, and false if subexpressions should be
19101 scanned. In either case, *TOTAL contains the cost result. */
19103 static bool
19104 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
19105 int *total, bool speed)
19107 rtx mask;
19108 enum rtx_code code = GET_CODE (x);
19109 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
19110 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
19111 int src_cost;
19113 switch (code)
19115 case SET:
19116 if (register_operand (SET_DEST (x), VOIDmode)
19117 && register_operand (SET_SRC (x), VOIDmode))
19119 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
19120 return true;
19123 if (register_operand (SET_SRC (x), VOIDmode))
19124 /* Avoid potentially incorrect high cost from rtx_costs
19125 for non-tieable SUBREGs. */
19126 src_cost = 0;
19127 else
19129 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
19131 if (CONSTANT_P (SET_SRC (x)))
19132 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
19133 a small value, possibly zero for cheap constants. */
19134 src_cost += COSTS_N_INSNS (1);
19137 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
19138 return true;
19140 case CONST_INT:
19141 case CONST:
19142 case LABEL_REF:
19143 case SYMBOL_REF:
19144 if (x86_64_immediate_operand (x, VOIDmode))
19145 *total = 0;
19146 else
19147 *total = 1;
19148 return true;
19150 case CONST_DOUBLE:
19151 if (IS_STACK_MODE (mode))
19152 switch (standard_80387_constant_p (x))
19154 case -1:
19155 case 0:
19156 break;
19157 case 1: /* 0.0 */
19158 *total = 1;
19159 return true;
19160 default: /* Other constants */
19161 *total = 2;
19162 return true;
19164 /* FALLTHRU */
19166 case CONST_VECTOR:
19167 switch (standard_sse_constant_p (x, mode))
19169 case 0:
19170 break;
19171 case 1: /* 0: xor eliminates false dependency */
19172 *total = 0;
19173 return true;
19174 default: /* -1: cmp contains false dependency */
19175 *total = 1;
19176 return true;
19178 /* FALLTHRU */
19180 case CONST_WIDE_INT:
19181 /* Fall back to (MEM (SYMBOL_REF)), since that's where
19182 it'll probably end up. Add a penalty for size. */
19183 *total = (COSTS_N_INSNS (1)
19184 + (!TARGET_64BIT && flag_pic)
19185 + (GET_MODE_SIZE (mode) <= 4
19186 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
19187 return true;
19189 case ZERO_EXTEND:
19190 /* The zero extensions is often completely free on x86_64, so make
19191 it as cheap as possible. */
19192 if (TARGET_64BIT && mode == DImode
19193 && GET_MODE (XEXP (x, 0)) == SImode)
19194 *total = 1;
19195 else if (TARGET_ZERO_EXTEND_WITH_AND)
19196 *total = cost->add;
19197 else
19198 *total = cost->movzx;
19199 return false;
19201 case SIGN_EXTEND:
19202 *total = cost->movsx;
19203 return false;
19205 case ASHIFT:
19206 if (SCALAR_INT_MODE_P (mode)
19207 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
19208 && CONST_INT_P (XEXP (x, 1)))
19210 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19211 if (value == 1)
19213 *total = cost->add;
19214 return false;
19216 if ((value == 2 || value == 3)
19217 && cost->lea <= cost->shift_const)
19219 *total = cost->lea;
19220 return false;
19223 /* FALLTHRU */
19225 case ROTATE:
19226 case ASHIFTRT:
19227 case LSHIFTRT:
19228 case ROTATERT:
19229 bool skip_op0, skip_op1;
19230 *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)),
19231 CONST_INT_P (XEXP (x, 1))
19232 ? INTVAL (XEXP (x, 1)) : -1,
19233 speed,
19234 GET_CODE (XEXP (x, 1)) == AND,
19235 SUBREG_P (XEXP (x, 1))
19236 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND,
19237 &skip_op0, &skip_op1);
19238 if (skip_op0 || skip_op1)
19240 if (!skip_op0)
19241 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
19242 if (!skip_op1)
19243 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
19244 return true;
19246 return false;
19248 case FMA:
19250 rtx sub;
19252 gcc_assert (FLOAT_MODE_P (mode));
19253 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
19255 *total = ix86_vec_cost (mode,
19256 GET_MODE_INNER (mode) == SFmode
19257 ? cost->fmass : cost->fmasd);
19258 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
19260 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
19261 sub = XEXP (x, 0);
19262 if (GET_CODE (sub) == NEG)
19263 sub = XEXP (sub, 0);
19264 *total += rtx_cost (sub, mode, FMA, 0, speed);
19266 sub = XEXP (x, 2);
19267 if (GET_CODE (sub) == NEG)
19268 sub = XEXP (sub, 0);
19269 *total += rtx_cost (sub, mode, FMA, 2, speed);
19270 return true;
19273 case MULT:
19274 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
19276 rtx op0 = XEXP (x, 0);
19277 rtx op1 = XEXP (x, 1);
19278 int nbits;
19279 if (CONST_INT_P (XEXP (x, 1)))
19281 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19282 for (nbits = 0; value != 0; value &= value - 1)
19283 nbits++;
19285 else
19286 /* This is arbitrary. */
19287 nbits = 7;
19289 /* Compute costs correctly for widening multiplication. */
19290 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
19291 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19292 == GET_MODE_SIZE (mode))
19294 int is_mulwiden = 0;
19295 machine_mode inner_mode = GET_MODE (op0);
19297 if (GET_CODE (op0) == GET_CODE (op1))
19298 is_mulwiden = 1, op1 = XEXP (op1, 0);
19299 else if (CONST_INT_P (op1))
19301 if (GET_CODE (op0) == SIGN_EXTEND)
19302 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19303 == INTVAL (op1);
19304 else
19305 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19308 if (is_mulwiden)
19309 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19312 *total = (cost->mult_init[MODE_INDEX (mode)]
19313 + nbits * cost->mult_bit
19314 + rtx_cost (op0, mode, outer_code, opno, speed)
19315 + rtx_cost (op1, mode, outer_code, opno, speed));
19317 return true;
19319 *total = ix86_multiplication_cost (cost, mode);
19320 return false;
19322 case DIV:
19323 case UDIV:
19324 case MOD:
19325 case UMOD:
19326 *total = ix86_division_cost (cost, mode);
19327 return false;
19329 case PLUS:
19330 if (GET_MODE_CLASS (mode) == MODE_INT
19331 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
19333 if (GET_CODE (XEXP (x, 0)) == PLUS
19334 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19335 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19336 && CONSTANT_P (XEXP (x, 1)))
19338 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19339 if (val == 2 || val == 4 || val == 8)
19341 *total = cost->lea;
19342 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
19343 outer_code, opno, speed);
19344 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
19345 outer_code, opno, speed);
19346 *total += rtx_cost (XEXP (x, 1), mode,
19347 outer_code, opno, speed);
19348 return true;
19351 else if (GET_CODE (XEXP (x, 0)) == MULT
19352 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19354 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19355 if (val == 2 || val == 4 || val == 8)
19357 *total = cost->lea;
19358 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19359 outer_code, opno, speed);
19360 *total += rtx_cost (XEXP (x, 1), mode,
19361 outer_code, opno, speed);
19362 return true;
19365 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19367 /* Add with carry, ignore the cost of adding a carry flag. */
19368 if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
19369 *total = cost->add;
19370 else
19372 *total = cost->lea;
19373 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19374 outer_code, opno, speed);
19377 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
19378 outer_code, opno, speed);
19379 *total += rtx_cost (XEXP (x, 1), mode,
19380 outer_code, opno, speed);
19381 return true;
19384 /* FALLTHRU */
19386 case MINUS:
19387 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
19388 if (GET_MODE_CLASS (mode) == MODE_INT
19389 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
19390 && GET_CODE (XEXP (x, 0)) == MINUS
19391 && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
19393 *total = cost->add;
19394 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19395 outer_code, opno, speed);
19396 *total += rtx_cost (XEXP (x, 1), mode,
19397 outer_code, opno, speed);
19398 return true;
19401 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19403 *total = cost->addss;
19404 return false;
19406 else if (X87_FLOAT_MODE_P (mode))
19408 *total = cost->fadd;
19409 return false;
19411 else if (FLOAT_MODE_P (mode))
19413 *total = ix86_vec_cost (mode, cost->addss);
19414 return false;
19416 /* FALLTHRU */
19418 case AND:
19419 case IOR:
19420 case XOR:
19421 if (GET_MODE_CLASS (mode) == MODE_INT
19422 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19424 *total = (cost->add * 2
19425 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
19426 << (GET_MODE (XEXP (x, 0)) != DImode))
19427 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
19428 << (GET_MODE (XEXP (x, 1)) != DImode)));
19429 return true;
19431 /* FALLTHRU */
19433 case NEG:
19434 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19436 *total = cost->sse_op;
19437 return false;
19439 else if (X87_FLOAT_MODE_P (mode))
19441 *total = cost->fchs;
19442 return false;
19444 else if (FLOAT_MODE_P (mode))
19446 *total = ix86_vec_cost (mode, cost->sse_op);
19447 return false;
19449 /* FALLTHRU */
19451 case NOT:
19452 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19453 *total = ix86_vec_cost (mode, cost->sse_op);
19454 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19455 *total = cost->add * 2;
19456 else
19457 *total = cost->add;
19458 return false;
19460 case COMPARE:
19461 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19462 && XEXP (XEXP (x, 0), 1) == const1_rtx
19463 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19464 && XEXP (x, 1) == const0_rtx)
19466 /* This kind of construct is implemented using test[bwl].
19467 Treat it as if we had an AND. */
19468 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
19469 *total = (cost->add
19470 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
19471 opno, speed)
19472 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
19473 return true;
19476 /* The embedded comparison operand is completely free. */
19477 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
19478 && XEXP (x, 1) == const0_rtx)
19479 *total = 0;
19481 return false;
19483 case FLOAT_EXTEND:
19484 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19485 *total = 0;
19486 else
19487 *total = ix86_vec_cost (mode, cost->addss);
19488 return false;
19490 case FLOAT_TRUNCATE:
19491 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19492 *total = cost->fadd;
19493 else
19494 *total = ix86_vec_cost (mode, cost->addss);
19495 return false;
19497 case ABS:
19498 /* SSE requires memory load for the constant operand. It may make
19499 sense to account for this. Of course the constant operand may or
19500 may not be reused. */
19501 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19502 *total = cost->sse_op;
19503 else if (X87_FLOAT_MODE_P (mode))
19504 *total = cost->fabs;
19505 else if (FLOAT_MODE_P (mode))
19506 *total = ix86_vec_cost (mode, cost->sse_op);
19507 return false;
19509 case SQRT:
19510 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19511 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
19512 else if (X87_FLOAT_MODE_P (mode))
19513 *total = cost->fsqrt;
19514 else if (FLOAT_MODE_P (mode))
19515 *total = ix86_vec_cost (mode,
19516 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
19517 return false;
19519 case UNSPEC:
19520 if (XINT (x, 1) == UNSPEC_TP)
19521 *total = 0;
19522 return false;
19524 case VEC_SELECT:
19525 case VEC_CONCAT:
19526 case VEC_DUPLICATE:
19527 /* ??? Assume all of these vector manipulation patterns are
19528 recognizable. In which case they all pretty much have the
19529 same cost. */
19530 *total = cost->sse_op;
19531 return true;
19532 case VEC_MERGE:
19533 mask = XEXP (x, 2);
19534 /* This is masked instruction, assume the same cost,
19535 as nonmasked variant. */
19536 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
19537 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
19538 else
19539 *total = cost->sse_op;
19540 return true;
19542 default:
19543 return false;
19547 #if TARGET_MACHO
19549 static int current_machopic_label_num;
19551 /* Given a symbol name and its associated stub, write out the
19552 definition of the stub. */
19554 void
19555 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19557 unsigned int length;
19558 char *binder_name, *symbol_name, lazy_ptr_name[32];
19559 int label = ++current_machopic_label_num;
19561 /* For 64-bit we shouldn't get here. */
19562 gcc_assert (!TARGET_64BIT);
19564 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19565 symb = targetm.strip_name_encoding (symb);
19567 length = strlen (stub);
19568 binder_name = XALLOCAVEC (char, length + 32);
19569 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19571 length = strlen (symb);
19572 symbol_name = XALLOCAVEC (char, length + 32);
19573 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19575 sprintf (lazy_ptr_name, "L%d$lz", label);
19577 if (MACHOPIC_ATT_STUB)
19578 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
19579 else if (MACHOPIC_PURE)
19580 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
19581 else
19582 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19584 fprintf (file, "%s:\n", stub);
19585 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19587 if (MACHOPIC_ATT_STUB)
19589 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
19591 else if (MACHOPIC_PURE)
19593 /* PIC stub. */
19594 /* 25-byte PIC stub using "CALL get_pc_thunk". */
19595 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19596 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
19597 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
19598 label, lazy_ptr_name, label);
19599 fprintf (file, "\tjmp\t*%%ecx\n");
19601 else
19602 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19604 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
19605 it needs no stub-binding-helper. */
19606 if (MACHOPIC_ATT_STUB)
19607 return;
19609 fprintf (file, "%s:\n", binder_name);
19611 if (MACHOPIC_PURE)
19613 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
19614 fprintf (file, "\tpushl\t%%ecx\n");
19616 else
19617 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19619 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
19621 /* N.B. Keep the correspondence of these
19622 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
19623 old-pic/new-pic/non-pic stubs; altering this will break
19624 compatibility with existing dylibs. */
19625 if (MACHOPIC_PURE)
19627 /* 25-byte PIC stub using "CALL get_pc_thunk". */
19628 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
19630 else
19631 /* 16-byte -mdynamic-no-pic stub. */
19632 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
19634 fprintf (file, "%s:\n", lazy_ptr_name);
19635 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19636 fprintf (file, ASM_LONG "%s\n", binder_name);
19638 #endif /* TARGET_MACHO */
19640 /* Order the registers for register allocator. */
19642 void
19643 x86_order_regs_for_local_alloc (void)
19645 int pos = 0;
19646 int i;
19648 /* First allocate the local general purpose registers. */
19649 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19650 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19651 reg_alloc_order [pos++] = i;
19653 /* Global general purpose registers. */
19654 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19655 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19656 reg_alloc_order [pos++] = i;
19658 /* x87 registers come first in case we are doing FP math
19659 using them. */
19660 if (!TARGET_SSE_MATH)
19661 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19662 reg_alloc_order [pos++] = i;
19664 /* SSE registers. */
19665 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19666 reg_alloc_order [pos++] = i;
19667 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19668 reg_alloc_order [pos++] = i;
19670 /* Extended REX SSE registers. */
19671 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
19672 reg_alloc_order [pos++] = i;
19674 /* Mask register. */
19675 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
19676 reg_alloc_order [pos++] = i;
19678 /* x87 registers. */
19679 if (TARGET_SSE_MATH)
19680 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19681 reg_alloc_order [pos++] = i;
19683 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19684 reg_alloc_order [pos++] = i;
19686 /* Initialize the rest of array as we do not allocate some registers
19687 at all. */
19688 while (pos < FIRST_PSEUDO_REGISTER)
19689 reg_alloc_order [pos++] = 0;
19692 static bool
19693 ix86_ms_bitfield_layout_p (const_tree record_type)
19695 return ((TARGET_MS_BITFIELD_LAYOUT
19696 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19697 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
19700 /* Returns an expression indicating where the this parameter is
19701 located on entry to the FUNCTION. */
19703 static rtx
19704 x86_this_parameter (tree function)
19706 tree type = TREE_TYPE (function);
19707 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
19708 int nregs;
19710 if (TARGET_64BIT)
19712 const int *parm_regs;
19714 if (ix86_function_type_abi (type) == MS_ABI)
19715 parm_regs = x86_64_ms_abi_int_parameter_registers;
19716 else
19717 parm_regs = x86_64_int_parameter_registers;
19718 return gen_rtx_REG (Pmode, parm_regs[aggr]);
19721 nregs = ix86_function_regparm (type, function);
19723 if (nregs > 0 && !stdarg_p (type))
19725 int regno;
19726 unsigned int ccvt = ix86_get_callcvt (type);
19728 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
19729 regno = aggr ? DX_REG : CX_REG;
19730 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
19732 regno = CX_REG;
19733 if (aggr)
19734 return gen_rtx_MEM (SImode,
19735 plus_constant (Pmode, stack_pointer_rtx, 4));
19737 else
19739 regno = AX_REG;
19740 if (aggr)
19742 regno = DX_REG;
19743 if (nregs == 1)
19744 return gen_rtx_MEM (SImode,
19745 plus_constant (Pmode,
19746 stack_pointer_rtx, 4));
19749 return gen_rtx_REG (SImode, regno);
19752 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
19753 aggr ? 8 : 4));
19756 /* Determine whether x86_output_mi_thunk can succeed. */
19758 static bool
19759 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
19760 const_tree function)
19762 /* 64-bit can handle anything. */
19763 if (TARGET_64BIT)
19764 return true;
19766 /* For 32-bit, everything's fine if we have one free register. */
19767 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19768 return true;
19770 /* Need a free register for vcall_offset. */
19771 if (vcall_offset)
19772 return false;
19774 /* Need a free register for GOT references. */
19775 if (flag_pic && !targetm.binds_local_p (function))
19776 return false;
19778 /* Otherwise ok. */
19779 return true;
19782 /* Output the assembler code for a thunk function. THUNK_DECL is the
19783 declaration for the thunk function itself, FUNCTION is the decl for
19784 the target function. DELTA is an immediate constant offset to be
19785 added to THIS. If VCALL_OFFSET is nonzero, the word at
19786 *(*this + vcall_offset) should be added to THIS. */
19788 static void
19789 x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
19790 HOST_WIDE_INT vcall_offset, tree function)
19792 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
19793 rtx this_param = x86_this_parameter (function);
19794 rtx this_reg, tmp, fnaddr;
19795 unsigned int tmp_regno;
19796 rtx_insn *insn;
19798 if (TARGET_64BIT)
19799 tmp_regno = R10_REG;
19800 else
19802 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
19803 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
19804 tmp_regno = AX_REG;
19805 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
19806 tmp_regno = DX_REG;
19807 else
19808 tmp_regno = CX_REG;
19811 emit_note (NOTE_INSN_PROLOGUE_END);
19813 /* CET is enabled, insert EB instruction. */
19814 if ((flag_cf_protection & CF_BRANCH))
19815 emit_insn (gen_nop_endbr ());
19817 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19818 pull it in now and let DELTA benefit. */
19819 if (REG_P (this_param))
19820 this_reg = this_param;
19821 else if (vcall_offset)
19823 /* Put the this parameter into %eax. */
19824 this_reg = gen_rtx_REG (Pmode, AX_REG);
19825 emit_move_insn (this_reg, this_param);
19827 else
19828 this_reg = NULL_RTX;
19830 /* Adjust the this parameter by a fixed constant. */
19831 if (delta)
19833 rtx delta_rtx = GEN_INT (delta);
19834 rtx delta_dst = this_reg ? this_reg : this_param;
19836 if (TARGET_64BIT)
19838 if (!x86_64_general_operand (delta_rtx, Pmode))
19840 tmp = gen_rtx_REG (Pmode, tmp_regno);
19841 emit_move_insn (tmp, delta_rtx);
19842 delta_rtx = tmp;
19846 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
19849 /* Adjust the this parameter by a value stored in the vtable. */
19850 if (vcall_offset)
19852 rtx vcall_addr, vcall_mem, this_mem;
19854 tmp = gen_rtx_REG (Pmode, tmp_regno);
19856 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
19857 if (Pmode != ptr_mode)
19858 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
19859 emit_move_insn (tmp, this_mem);
19861 /* Adjust the this parameter. */
19862 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
19863 if (TARGET_64BIT
19864 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
19866 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
19867 emit_move_insn (tmp2, GEN_INT (vcall_offset));
19868 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
19871 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
19872 if (Pmode != ptr_mode)
19873 emit_insn (gen_addsi_1_zext (this_reg,
19874 gen_rtx_REG (ptr_mode,
19875 REGNO (this_reg)),
19876 vcall_mem));
19877 else
19878 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
19881 /* If necessary, drop THIS back to its stack slot. */
19882 if (this_reg && this_reg != this_param)
19883 emit_move_insn (this_param, this_reg);
19885 fnaddr = XEXP (DECL_RTL (function), 0);
19886 if (TARGET_64BIT)
19888 if (!flag_pic || targetm.binds_local_p (function)
19889 || TARGET_PECOFF)
19891 else
19893 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
19894 tmp = gen_rtx_CONST (Pmode, tmp);
19895 fnaddr = gen_const_mem (Pmode, tmp);
19898 else
19900 if (!flag_pic || targetm.binds_local_p (function))
19902 #if TARGET_MACHO
19903 else if (TARGET_MACHO)
19905 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
19906 fnaddr = XEXP (fnaddr, 0);
19908 #endif /* TARGET_MACHO */
19909 else
19911 tmp = gen_rtx_REG (Pmode, CX_REG);
19912 output_set_got (tmp, NULL_RTX);
19914 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
19915 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
19916 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
19917 fnaddr = gen_const_mem (Pmode, fnaddr);
19921 /* Our sibling call patterns do not allow memories, because we have no
19922 predicate that can distinguish between frame and non-frame memory.
19923 For our purposes here, we can get away with (ab)using a jump pattern,
19924 because we're going to do no optimization. */
19925 if (MEM_P (fnaddr))
19927 if (sibcall_insn_operand (fnaddr, word_mode))
19929 fnaddr = XEXP (DECL_RTL (function), 0);
19930 tmp = gen_rtx_MEM (QImode, fnaddr);
19931 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
19932 tmp = emit_call_insn (tmp);
19933 SIBLING_CALL_P (tmp) = 1;
19935 else
19936 emit_jump_insn (gen_indirect_jump (fnaddr));
19938 else
19940 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
19942 // CM_LARGE_PIC always uses pseudo PIC register which is
19943 // uninitialized. Since FUNCTION is local and calling it
19944 // doesn't go through PLT, we use scratch register %r11 as
19945 // PIC register and initialize it here.
19946 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
19947 ix86_init_large_pic_reg (tmp_regno);
19948 fnaddr = legitimize_pic_address (fnaddr,
19949 gen_rtx_REG (Pmode, tmp_regno));
19952 if (!sibcall_insn_operand (fnaddr, word_mode))
19954 tmp = gen_rtx_REG (word_mode, tmp_regno);
19955 if (GET_MODE (fnaddr) != word_mode)
19956 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
19957 emit_move_insn (tmp, fnaddr);
19958 fnaddr = tmp;
19961 tmp = gen_rtx_MEM (QImode, fnaddr);
19962 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
19963 tmp = emit_call_insn (tmp);
19964 SIBLING_CALL_P (tmp) = 1;
19966 emit_barrier ();
19968 /* Emit just enough of rest_of_compilation to get the insns emitted.
19969 Note that use_thunk calls assemble_start_function et al. */
19970 insn = get_insns ();
19971 shorten_branches (insn);
19972 assemble_start_function (thunk_fndecl, fnname);
19973 final_start_function (insn, file, 1);
19974 final (insn, file, 1);
19975 final_end_function ();
19976 assemble_end_function (thunk_fndecl, fnname);
19979 static void
19980 x86_file_start (void)
19982 default_file_start ();
19983 if (TARGET_16BIT)
19984 fputs ("\t.code16gcc\n", asm_out_file);
19985 #if TARGET_MACHO
19986 darwin_file_start ();
19987 #endif
19988 if (X86_FILE_START_VERSION_DIRECTIVE)
19989 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19990 if (X86_FILE_START_FLTUSED)
19991 fputs ("\t.global\t__fltused\n", asm_out_file);
19992 if (ix86_asm_dialect == ASM_INTEL)
19993 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
19997 x86_field_alignment (tree type, int computed)
19999 machine_mode mode;
20001 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
20002 return computed;
20003 if (TARGET_IAMCU)
20004 return iamcu_alignment (type, computed);
20005 mode = TYPE_MODE (strip_array_types (type));
20006 if (mode == DFmode || mode == DCmode
20007 || GET_MODE_CLASS (mode) == MODE_INT
20008 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20009 return MIN (32, computed);
20010 return computed;
20013 /* Print call to TARGET to FILE. */
20015 static void
20016 x86_print_call_or_nop (FILE *file, const char *target)
20018 if (flag_nop_mcount || !strcmp (target, "nop"))
20019 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
20020 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
20021 else
20022 fprintf (file, "1:\tcall\t%s\n", target);
20025 static bool
20026 current_fentry_name (const char **name)
20028 tree attr = lookup_attribute ("fentry_name",
20029 DECL_ATTRIBUTES (current_function_decl));
20030 if (!attr)
20031 return false;
20032 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20033 return true;
20036 static bool
20037 current_fentry_section (const char **name)
20039 tree attr = lookup_attribute ("fentry_section",
20040 DECL_ATTRIBUTES (current_function_decl));
20041 if (!attr)
20042 return false;
20043 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20044 return true;
20047 /* Output assembler code to FILE to increment profiler label # LABELNO
20048 for profiling a function entry. */
20049 void
20050 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20052 if (cfun->machine->endbr_queued_at_entrance)
20053 fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
20055 const char *mcount_name = MCOUNT_NAME;
20057 if (current_fentry_name (&mcount_name))
20059 else if (fentry_name)
20060 mcount_name = fentry_name;
20061 else if (flag_fentry)
20062 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
20064 if (TARGET_64BIT)
20066 #ifndef NO_PROFILE_COUNTERS
20067 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
20068 #endif
20070 if (!TARGET_PECOFF && flag_pic)
20071 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
20072 else
20073 x86_print_call_or_nop (file, mcount_name);
20075 else if (flag_pic)
20077 #ifndef NO_PROFILE_COUNTERS
20078 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
20079 LPREFIX, labelno);
20080 #endif
20081 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
20083 else
20085 #ifndef NO_PROFILE_COUNTERS
20086 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
20087 LPREFIX, labelno);
20088 #endif
20089 x86_print_call_or_nop (file, mcount_name);
20092 if (flag_record_mcount
20093 || lookup_attribute ("fentry_section",
20094 DECL_ATTRIBUTES (current_function_decl)))
20096 const char *sname = "__mcount_loc";
20098 if (current_fentry_section (&sname))
20100 else if (fentry_section)
20101 sname = fentry_section;
20103 fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
20104 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
20105 fprintf (file, "\t.previous\n");
20109 /* We don't have exact information about the insn sizes, but we may assume
20110 quite safely that we are informed about all 1 byte insns and memory
20111 address sizes. This is enough to eliminate unnecessary padding in
20112 99% of cases. */
20115 ix86_min_insn_size (rtx_insn *insn)
20117 int l = 0, len;
20119 if (!INSN_P (insn) || !active_insn_p (insn))
20120 return 0;
20122 /* Discard alignments we've emit and jump instructions. */
20123 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20124 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20125 return 0;
20127 /* Important case - calls are always 5 bytes.
20128 It is common to have many calls in the row. */
20129 if (CALL_P (insn)
20130 && symbolic_reference_mentioned_p (PATTERN (insn))
20131 && !SIBLING_CALL_P (insn))
20132 return 5;
20133 len = get_attr_length (insn);
20134 if (len <= 1)
20135 return 1;
20137 /* For normal instructions we rely on get_attr_length being exact,
20138 with a few exceptions. */
20139 if (!JUMP_P (insn))
20141 enum attr_type type = get_attr_type (insn);
20143 switch (type)
20145 case TYPE_MULTI:
20146 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
20147 || asm_noperands (PATTERN (insn)) >= 0)
20148 return 0;
20149 break;
20150 case TYPE_OTHER:
20151 case TYPE_FCMP:
20152 break;
20153 default:
20154 /* Otherwise trust get_attr_length. */
20155 return len;
20158 l = get_attr_length_address (insn);
20159 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20160 l = 4;
20162 if (l)
20163 return 1+l;
20164 else
20165 return 2;
20168 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20170 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20171 window. */
20173 static void
20174 ix86_avoid_jump_mispredicts (void)
20176 rtx_insn *insn, *start = get_insns ();
20177 int nbytes = 0, njumps = 0;
20178 bool isjump = false;
20180 /* Look for all minimal intervals of instructions containing 4 jumps.
20181 The intervals are bounded by START and INSN. NBYTES is the total
20182 size of instructions in the interval including INSN and not including
20183 START. When the NBYTES is smaller than 16 bytes, it is possible
20184 that the end of START and INSN ends up in the same 16byte page.
20186 The smallest offset in the page INSN can start is the case where START
20187 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20188 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
20190 Don't consider asm goto as jump, while it can contain a jump, it doesn't
20191 have to, control transfer to label(s) can be performed through other
20192 means, and also we estimate minimum length of all asm stmts as 0. */
20193 for (insn = start; insn; insn = NEXT_INSN (insn))
20195 int min_size;
20197 if (LABEL_P (insn))
20199 align_flags alignment = label_to_alignment (insn);
20200 int align = alignment.levels[0].log;
20201 int max_skip = alignment.levels[0].maxskip;
20203 if (max_skip > 15)
20204 max_skip = 15;
20205 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
20206 already in the current 16 byte page, because otherwise
20207 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
20208 bytes to reach 16 byte boundary. */
20209 if (align <= 0
20210 || (align <= 3 && max_skip != (1 << align) - 1))
20211 max_skip = 0;
20212 if (dump_file)
20213 fprintf (dump_file, "Label %i with max_skip %i\n",
20214 INSN_UID (insn), max_skip);
20215 if (max_skip)
20217 while (nbytes + max_skip >= 16)
20219 start = NEXT_INSN (start);
20220 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20221 || CALL_P (start))
20222 njumps--, isjump = true;
20223 else
20224 isjump = false;
20225 nbytes -= ix86_min_insn_size (start);
20228 continue;
20231 min_size = ix86_min_insn_size (insn);
20232 nbytes += min_size;
20233 if (dump_file)
20234 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
20235 INSN_UID (insn), min_size);
20236 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
20237 || CALL_P (insn))
20238 njumps++;
20239 else
20240 continue;
20242 while (njumps > 3)
20244 start = NEXT_INSN (start);
20245 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20246 || CALL_P (start))
20247 njumps--, isjump = true;
20248 else
20249 isjump = false;
20250 nbytes -= ix86_min_insn_size (start);
20252 gcc_assert (njumps >= 0);
20253 if (dump_file)
20254 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20255 INSN_UID (start), INSN_UID (insn), nbytes);
20257 if (njumps == 3 && isjump && nbytes < 16)
20259 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
20261 if (dump_file)
20262 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20263 INSN_UID (insn), padsize);
20264 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
20268 #endif
20270 /* AMD Athlon works faster
20271 when RET is not destination of conditional jump or directly preceded
20272 by other jump instruction. We avoid the penalty by inserting NOP just
20273 before the RET instructions in such cases. */
20274 static void
20275 ix86_pad_returns (void)
20277 edge e;
20278 edge_iterator ei;
20280 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20282 basic_block bb = e->src;
20283 rtx_insn *ret = BB_END (bb);
20284 rtx_insn *prev;
20285 bool replace = false;
20287 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
20288 || optimize_bb_for_size_p (bb))
20289 continue;
20290 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20291 if (active_insn_p (prev) || LABEL_P (prev))
20292 break;
20293 if (prev && LABEL_P (prev))
20295 edge e;
20296 edge_iterator ei;
20298 FOR_EACH_EDGE (e, ei, bb->preds)
20299 if (EDGE_FREQUENCY (e) && e->src->index >= 0
20300 && !(e->flags & EDGE_FALLTHRU))
20302 replace = true;
20303 break;
20306 if (!replace)
20308 prev = prev_active_insn (ret);
20309 if (prev
20310 && ((JUMP_P (prev) && any_condjump_p (prev))
20311 || CALL_P (prev)))
20312 replace = true;
20313 /* Empty functions get branch mispredict even when
20314 the jump destination is not visible to us. */
20315 if (!prev && !optimize_function_for_size_p (cfun))
20316 replace = true;
20318 if (replace)
20320 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
20321 delete_insn (ret);
20326 /* Count the minimum number of instructions in BB. Return 4 if the
20327 number of instructions >= 4. */
20329 static int
20330 ix86_count_insn_bb (basic_block bb)
20332 rtx_insn *insn;
20333 int insn_count = 0;
20335 /* Count number of instructions in this block. Return 4 if the number
20336 of instructions >= 4. */
20337 FOR_BB_INSNS (bb, insn)
20339 /* Only happen in exit blocks. */
20340 if (JUMP_P (insn)
20341 && ANY_RETURN_P (PATTERN (insn)))
20342 break;
20344 if (NONDEBUG_INSN_P (insn)
20345 && GET_CODE (PATTERN (insn)) != USE
20346 && GET_CODE (PATTERN (insn)) != CLOBBER)
20348 insn_count++;
20349 if (insn_count >= 4)
20350 return insn_count;
20354 return insn_count;
20358 /* Count the minimum number of instructions in code path in BB.
20359 Return 4 if the number of instructions >= 4. */
20361 static int
20362 ix86_count_insn (basic_block bb)
20364 edge e;
20365 edge_iterator ei;
20366 int min_prev_count;
20368 /* Only bother counting instructions along paths with no
20369 more than 2 basic blocks between entry and exit. Given
20370 that BB has an edge to exit, determine if a predecessor
20371 of BB has an edge from entry. If so, compute the number
20372 of instructions in the predecessor block. If there
20373 happen to be multiple such blocks, compute the minimum. */
20374 min_prev_count = 4;
20375 FOR_EACH_EDGE (e, ei, bb->preds)
20377 edge prev_e;
20378 edge_iterator prev_ei;
20380 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
20382 min_prev_count = 0;
20383 break;
20385 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
20387 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
20389 int count = ix86_count_insn_bb (e->src);
20390 if (count < min_prev_count)
20391 min_prev_count = count;
20392 break;
20397 if (min_prev_count < 4)
20398 min_prev_count += ix86_count_insn_bb (bb);
20400 return min_prev_count;
20403 /* Pad short function to 4 instructions. */
20405 static void
20406 ix86_pad_short_function (void)
20408 edge e;
20409 edge_iterator ei;
20411 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20413 rtx_insn *ret = BB_END (e->src);
20414 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
20416 int insn_count = ix86_count_insn (e->src);
20418 /* Pad short function. */
20419 if (insn_count < 4)
20421 rtx_insn *insn = ret;
20423 /* Find epilogue. */
20424 while (insn
20425 && (!NOTE_P (insn)
20426 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
20427 insn = PREV_INSN (insn);
20429 if (!insn)
20430 insn = ret;
20432 /* Two NOPs count as one instruction. */
20433 insn_count = 2 * (4 - insn_count);
20434 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
20440 /* Fix up a Windows system unwinder issue. If an EH region falls through into
20441 the epilogue, the Windows system unwinder will apply epilogue logic and
20442 produce incorrect offsets. This can be avoided by adding a nop between
20443 the last insn that can throw and the first insn of the epilogue. */
20445 static void
20446 ix86_seh_fixup_eh_fallthru (void)
20448 edge e;
20449 edge_iterator ei;
20451 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20453 rtx_insn *insn, *next;
20455 /* Find the beginning of the epilogue. */
20456 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
20457 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
20458 break;
20459 if (insn == NULL)
20460 continue;
20462 /* We only care about preceding insns that can throw. */
20463 insn = prev_active_insn (insn);
20464 if (insn == NULL || !can_throw_internal (insn))
20465 continue;
20467 /* Do not separate calls from their debug information. */
20468 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
20469 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
20470 insn = next;
20471 else
20472 break;
20474 emit_insn_after (gen_nops (const1_rtx), insn);
20478 /* Implement machine specific optimizations. We implement padding of returns
20479 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20480 static void
20481 ix86_reorg (void)
20483 /* We are freeing block_for_insn in the toplev to keep compatibility
20484 with old MDEP_REORGS that are not CFG based. Recompute it now. */
20485 compute_bb_for_insn ();
20487 if (TARGET_SEH && current_function_has_exception_handlers ())
20488 ix86_seh_fixup_eh_fallthru ();
20490 if (optimize && optimize_function_for_speed_p (cfun))
20492 if (TARGET_PAD_SHORT_FUNCTION)
20493 ix86_pad_short_function ();
20494 else if (TARGET_PAD_RETURNS)
20495 ix86_pad_returns ();
20496 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20497 if (TARGET_FOUR_JUMP_LIMIT)
20498 ix86_avoid_jump_mispredicts ();
20499 #endif
20503 /* Return nonzero when QImode register that must be represented via REX prefix
20504 is used. */
20505 bool
20506 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
20508 int i;
20509 extract_insn_cached (insn);
20510 for (i = 0; i < recog_data.n_operands; i++)
20511 if (GENERAL_REG_P (recog_data.operand[i])
20512 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
20513 return true;
20514 return false;
20517 /* Return true when INSN mentions register that must be encoded using REX
20518 prefix. */
20519 bool
20520 x86_extended_reg_mentioned_p (rtx insn)
20522 subrtx_iterator::array_type array;
20523 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
20525 const_rtx x = *iter;
20526 if (REG_P (x)
20527 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
20528 return true;
20530 return false;
20533 /* If profitable, negate (without causing overflow) integer constant
20534 of mode MODE at location LOC. Return true in this case. */
20535 bool
20536 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
20538 HOST_WIDE_INT val;
20540 if (!CONST_INT_P (*loc))
20541 return false;
20543 switch (mode)
20545 case E_DImode:
20546 /* DImode x86_64 constants must fit in 32 bits. */
20547 gcc_assert (x86_64_immediate_operand (*loc, mode));
20549 mode = SImode;
20550 break;
20552 case E_SImode:
20553 case E_HImode:
20554 case E_QImode:
20555 break;
20557 default:
20558 gcc_unreachable ();
20561 /* Avoid overflows. */
20562 if (mode_signbit_p (mode, *loc))
20563 return false;
20565 val = INTVAL (*loc);
20567 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
20568 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
20569 if ((val < 0 && val != -128)
20570 || val == 128)
20572 *loc = GEN_INT (-val);
20573 return true;
20576 return false;
20579 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20580 optabs would emit if we didn't have TFmode patterns. */
20582 void
20583 x86_emit_floatuns (rtx operands[2])
20585 rtx_code_label *neglab, *donelab;
20586 rtx i0, i1, f0, in, out;
20587 machine_mode mode, inmode;
20589 inmode = GET_MODE (operands[1]);
20590 gcc_assert (inmode == SImode || inmode == DImode);
20592 out = operands[0];
20593 in = force_reg (inmode, operands[1]);
20594 mode = GET_MODE (out);
20595 neglab = gen_label_rtx ();
20596 donelab = gen_label_rtx ();
20597 f0 = gen_reg_rtx (mode);
20599 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20601 expand_float (out, in, 0);
20603 emit_jump_insn (gen_jump (donelab));
20604 emit_barrier ();
20606 emit_label (neglab);
20608 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20609 1, OPTAB_DIRECT);
20610 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20611 1, OPTAB_DIRECT);
20612 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20614 expand_float (f0, i0, 0);
20616 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
20618 emit_label (donelab);
20621 /* Target hook for scalar_mode_supported_p. */
20622 static bool
20623 ix86_scalar_mode_supported_p (scalar_mode mode)
20625 if (DECIMAL_FLOAT_MODE_P (mode))
20626 return default_decimal_float_supported_p ();
20627 else if (mode == TFmode)
20628 return true;
20629 else
20630 return default_scalar_mode_supported_p (mode);
20633 /* Implements target hook vector_mode_supported_p. */
20634 static bool
20635 ix86_vector_mode_supported_p (machine_mode mode)
20637 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20638 return true;
20639 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20640 return true;
20641 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
20642 return true;
20643 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
20644 return true;
20645 if ((TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode))
20646 return true;
20647 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20648 return true;
20649 return false;
20652 /* Target hook for c_mode_for_suffix. */
20653 static machine_mode
20654 ix86_c_mode_for_suffix (char suffix)
20656 if (suffix == 'q')
20657 return TFmode;
20658 if (suffix == 'w')
20659 return XFmode;
20661 return VOIDmode;
20664 /* Worker function for TARGET_MD_ASM_ADJUST.
20666 We implement asm flag outputs, and maintain source compatibility
20667 with the old cc0-based compiler. */
20669 static rtx_insn *
20670 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
20671 vec<const char *> &constraints,
20672 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
20674 bool saw_asm_flag = false;
20676 start_sequence ();
20677 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
20679 const char *con = constraints[i];
20680 if (strncmp (con, "=@cc", 4) != 0)
20681 continue;
20682 con += 4;
20683 if (strchr (con, ',') != NULL)
20685 error ("alternatives not allowed in %<asm%> flag output");
20686 continue;
20689 bool invert = false;
20690 if (con[0] == 'n')
20691 invert = true, con++;
20693 machine_mode mode = CCmode;
20694 rtx_code code = UNKNOWN;
20696 switch (con[0])
20698 case 'a':
20699 if (con[1] == 0)
20700 mode = CCAmode, code = EQ;
20701 else if (con[1] == 'e' && con[2] == 0)
20702 mode = CCCmode, code = NE;
20703 break;
20704 case 'b':
20705 if (con[1] == 0)
20706 mode = CCCmode, code = EQ;
20707 else if (con[1] == 'e' && con[2] == 0)
20708 mode = CCAmode, code = NE;
20709 break;
20710 case 'c':
20711 if (con[1] == 0)
20712 mode = CCCmode, code = EQ;
20713 break;
20714 case 'e':
20715 if (con[1] == 0)
20716 mode = CCZmode, code = EQ;
20717 break;
20718 case 'g':
20719 if (con[1] == 0)
20720 mode = CCGCmode, code = GT;
20721 else if (con[1] == 'e' && con[2] == 0)
20722 mode = CCGCmode, code = GE;
20723 break;
20724 case 'l':
20725 if (con[1] == 0)
20726 mode = CCGCmode, code = LT;
20727 else if (con[1] == 'e' && con[2] == 0)
20728 mode = CCGCmode, code = LE;
20729 break;
20730 case 'o':
20731 if (con[1] == 0)
20732 mode = CCOmode, code = EQ;
20733 break;
20734 case 'p':
20735 if (con[1] == 0)
20736 mode = CCPmode, code = EQ;
20737 break;
20738 case 's':
20739 if (con[1] == 0)
20740 mode = CCSmode, code = EQ;
20741 break;
20742 case 'z':
20743 if (con[1] == 0)
20744 mode = CCZmode, code = EQ;
20745 break;
20747 if (code == UNKNOWN)
20749 error ("unknown %<asm%> flag output %qs", constraints[i]);
20750 continue;
20752 if (invert)
20753 code = reverse_condition (code);
20755 rtx dest = outputs[i];
20756 if (!saw_asm_flag)
20758 /* This is the first asm flag output. Here we put the flags
20759 register in as the real output and adjust the condition to
20760 allow it. */
20761 constraints[i] = "=Bf";
20762 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
20763 saw_asm_flag = true;
20765 else
20767 /* We don't need the flags register as output twice. */
20768 constraints[i] = "=X";
20769 outputs[i] = gen_rtx_SCRATCH (SImode);
20772 rtx x = gen_rtx_REG (mode, FLAGS_REG);
20773 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
20775 machine_mode dest_mode = GET_MODE (dest);
20776 if (!SCALAR_INT_MODE_P (dest_mode))
20778 error ("invalid type for %<asm%> flag output");
20779 continue;
20782 if (dest_mode == DImode && !TARGET_64BIT)
20783 dest_mode = SImode;
20785 if (dest_mode != QImode)
20787 rtx destqi = gen_reg_rtx (QImode);
20788 emit_insn (gen_rtx_SET (destqi, x));
20790 if (TARGET_ZERO_EXTEND_WITH_AND
20791 && optimize_function_for_speed_p (cfun))
20793 x = force_reg (dest_mode, const0_rtx);
20795 emit_insn (gen_movstrictqi
20796 (gen_lowpart (QImode, x), destqi));
20798 else
20799 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
20802 if (dest_mode != GET_MODE (dest))
20804 rtx tmp = gen_reg_rtx (SImode);
20806 emit_insn (gen_rtx_SET (tmp, x));
20807 emit_insn (gen_zero_extendsidi2 (dest, tmp));
20809 else
20810 emit_insn (gen_rtx_SET (dest, x));
20812 rtx_insn *seq = get_insns ();
20813 end_sequence ();
20815 if (saw_asm_flag)
20816 return seq;
20817 else
20819 /* If we had no asm flag outputs, clobber the flags. */
20820 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
20821 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
20822 return NULL;
20826 /* Implements target vector targetm.asm.encode_section_info. */
20828 static void ATTRIBUTE_UNUSED
20829 ix86_encode_section_info (tree decl, rtx rtl, int first)
20831 default_encode_section_info (decl, rtl, first);
20833 if (ix86_in_large_data_p (decl))
20834 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20837 /* Worker function for REVERSE_CONDITION. */
20839 enum rtx_code
20840 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
20842 return (mode == CCFPmode
20843 ? reverse_condition_maybe_unordered (code)
20844 : reverse_condition (code));
20847 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20848 to OPERANDS[0]. */
20850 const char *
20851 output_387_reg_move (rtx_insn *insn, rtx *operands)
20853 if (REG_P (operands[0]))
20855 if (REG_P (operands[1])
20856 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20858 if (REGNO (operands[0]) == FIRST_STACK_REG)
20859 return output_387_ffreep (operands, 0);
20860 return "fstp\t%y0";
20862 if (STACK_TOP_P (operands[0]))
20863 return "fld%Z1\t%y1";
20864 return "fst\t%y0";
20866 else if (MEM_P (operands[0]))
20868 gcc_assert (REG_P (operands[1]));
20869 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20870 return "fstp%Z0\t%y0";
20871 else
20873 /* There is no non-popping store to memory for XFmode.
20874 So if we need one, follow the store with a load. */
20875 if (GET_MODE (operands[0]) == XFmode)
20876 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
20877 else
20878 return "fst%Z0\t%y0";
20881 else
20882 gcc_unreachable();
20884 #ifdef TARGET_SOLARIS
20885 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20887 static void
20888 i386_solaris_elf_named_section (const char *name, unsigned int flags,
20889 tree decl)
20891 /* With Binutils 2.15, the "@unwind" marker must be specified on
20892 every occurrence of the ".eh_frame" section, not just the first
20893 one. */
20894 if (TARGET_64BIT
20895 && strcmp (name, ".eh_frame") == 0)
20897 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
20898 flags & SECTION_WRITE ? "aw" : "a");
20899 return;
20902 #ifndef USE_GAS
20903 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
20905 solaris_elf_asm_comdat_section (name, flags, decl);
20906 return;
20909 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
20910 SPARC assembler. One cannot mix single-letter flags and #exclude, so
20911 only emit the latter here. */
20912 if (flags & SECTION_EXCLUDE)
20914 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
20915 return;
20917 #endif
20919 default_elf_asm_named_section (name, flags, decl);
20921 #endif /* TARGET_SOLARIS */
20923 /* Return the mangling of TYPE if it is an extended fundamental type. */
20925 static const char *
20926 ix86_mangle_type (const_tree type)
20928 type = TYPE_MAIN_VARIANT (type);
20930 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20931 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
20932 return NULL;
20934 switch (TYPE_MODE (type))
20936 case E_TFmode:
20937 /* __float128 is "g". */
20938 return "g";
20939 case E_XFmode:
20940 /* "long double" or __float80 is "e". */
20941 return "e";
20942 default:
20943 return NULL;
20947 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
20949 static tree
20950 ix86_stack_protect_guard (void)
20952 if (TARGET_SSP_TLS_GUARD)
20954 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
20955 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
20956 tree type = build_qualified_type (type_node, qual);
20957 tree t;
20959 if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
20961 t = ix86_tls_stack_chk_guard_decl;
20963 if (t == NULL)
20965 rtx x;
20967 t = build_decl
20968 (UNKNOWN_LOCATION, VAR_DECL,
20969 get_identifier (ix86_stack_protector_guard_symbol_str),
20970 type);
20971 TREE_STATIC (t) = 1;
20972 TREE_PUBLIC (t) = 1;
20973 DECL_EXTERNAL (t) = 1;
20974 TREE_USED (t) = 1;
20975 TREE_THIS_VOLATILE (t) = 1;
20976 DECL_ARTIFICIAL (t) = 1;
20977 DECL_IGNORED_P (t) = 1;
20979 /* Do not share RTL as the declaration is visible outside of
20980 current function. */
20981 x = DECL_RTL (t);
20982 RTX_FLAG (x, used) = 1;
20984 ix86_tls_stack_chk_guard_decl = t;
20987 else
20989 tree asptrtype = build_pointer_type (type);
20991 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
20992 t = build2 (MEM_REF, asptrtype, t,
20993 build_int_cst (asptrtype, 0));
20994 TREE_THIS_VOLATILE (t) = 1;
20997 return t;
21000 return default_stack_protect_guard ();
21003 /* For 32-bit code we can save PIC register setup by using
21004 __stack_chk_fail_local hidden function instead of calling
21005 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21006 register, so it is better to call __stack_chk_fail directly. */
21008 static tree ATTRIBUTE_UNUSED
21009 ix86_stack_protect_fail (void)
21011 return TARGET_64BIT
21012 ? default_external_stack_protect_fail ()
21013 : default_hidden_stack_protect_fail ();
21016 /* Select a format to encode pointers in exception handling data. CODE
21017 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21018 true if the symbol may be affected by dynamic relocations.
21020 ??? All x86 object file formats are capable of representing this.
21021 After all, the relocation needed is the same as for the call insn.
21022 Whether or not a particular assembler allows us to enter such, I
21023 guess we'll have to see. */
21025 asm_preferred_eh_data_format (int code, int global)
21027 if (flag_pic)
21029 int type = DW_EH_PE_sdata8;
21030 if (!TARGET_64BIT
21031 || ix86_cmodel == CM_SMALL_PIC
21032 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21033 type = DW_EH_PE_sdata4;
21034 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21036 if (ix86_cmodel == CM_SMALL
21037 || (ix86_cmodel == CM_MEDIUM && code))
21038 return DW_EH_PE_udata4;
21039 return DW_EH_PE_absptr;
21042 /* Implement targetm.vectorize.builtin_vectorization_cost. */
21043 static int
21044 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
21045 tree vectype, int)
21047 bool fp = false;
21048 machine_mode mode = TImode;
21049 int index;
21050 if (vectype != NULL)
21052 fp = FLOAT_TYPE_P (vectype);
21053 mode = TYPE_MODE (vectype);
21056 switch (type_of_cost)
21058 case scalar_stmt:
21059 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
21061 case scalar_load:
21062 /* load/store costs are relative to register move which is 2. Recompute
21063 it to COSTS_N_INSNS so everything have same base. */
21064 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
21065 : ix86_cost->int_load [2]) / 2;
21067 case scalar_store:
21068 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
21069 : ix86_cost->int_store [2]) / 2;
21071 case vector_stmt:
21072 return ix86_vec_cost (mode,
21073 fp ? ix86_cost->addss : ix86_cost->sse_op);
21075 case vector_load:
21076 index = sse_store_index (mode);
21077 /* See PR82713 - we may end up being called on non-vector type. */
21078 if (index < 0)
21079 index = 2;
21080 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
21082 case vector_store:
21083 index = sse_store_index (mode);
21084 /* See PR82713 - we may end up being called on non-vector type. */
21085 if (index < 0)
21086 index = 2;
21087 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
21089 case vec_to_scalar:
21090 case scalar_to_vec:
21091 return ix86_vec_cost (mode, ix86_cost->sse_op);
21093 /* We should have separate costs for unaligned loads and gather/scatter.
21094 Do that incrementally. */
21095 case unaligned_load:
21096 index = sse_store_index (mode);
21097 /* See PR82713 - we may end up being called on non-vector type. */
21098 if (index < 0)
21099 index = 2;
21100 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
21102 case unaligned_store:
21103 index = sse_store_index (mode);
21104 /* See PR82713 - we may end up being called on non-vector type. */
21105 if (index < 0)
21106 index = 2;
21107 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
21109 case vector_gather_load:
21110 return ix86_vec_cost (mode,
21111 COSTS_N_INSNS
21112 (ix86_cost->gather_static
21113 + ix86_cost->gather_per_elt
21114 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21116 case vector_scatter_store:
21117 return ix86_vec_cost (mode,
21118 COSTS_N_INSNS
21119 (ix86_cost->scatter_static
21120 + ix86_cost->scatter_per_elt
21121 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21123 case cond_branch_taken:
21124 return ix86_cost->cond_taken_branch_cost;
21126 case cond_branch_not_taken:
21127 return ix86_cost->cond_not_taken_branch_cost;
21129 case vec_perm:
21130 case vec_promote_demote:
21131 return ix86_vec_cost (mode, ix86_cost->sse_op);
21133 case vec_construct:
21135 /* N element inserts into SSE vectors. */
21136 int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
21137 /* One vinserti128 for combining two SSE vectors for AVX256. */
21138 if (GET_MODE_BITSIZE (mode) == 256)
21139 cost += ix86_vec_cost (mode, ix86_cost->addss);
21140 /* One vinserti64x4 and two vinserti128 for combining SSE
21141 and AVX256 vectors to AVX512. */
21142 else if (GET_MODE_BITSIZE (mode) == 512)
21143 cost += 3 * ix86_vec_cost (mode, ix86_cost->addss);
21144 return cost;
21147 default:
21148 gcc_unreachable ();
21153 /* This function returns the calling abi specific va_list type node.
21154 It returns the FNDECL specific va_list type. */
21156 static tree
21157 ix86_fn_abi_va_list (tree fndecl)
21159 if (!TARGET_64BIT)
21160 return va_list_type_node;
21161 gcc_assert (fndecl != NULL_TREE);
21163 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
21164 return ms_va_list_type_node;
21165 else
21166 return sysv_va_list_type_node;
21169 /* Returns the canonical va_list type specified by TYPE. If there
21170 is no valid TYPE provided, it return NULL_TREE. */
21172 static tree
21173 ix86_canonical_va_list_type (tree type)
21175 if (TARGET_64BIT)
21177 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
21178 return ms_va_list_type_node;
21180 if ((TREE_CODE (type) == ARRAY_TYPE
21181 && integer_zerop (array_type_nelts (type)))
21182 || POINTER_TYPE_P (type))
21184 tree elem_type = TREE_TYPE (type);
21185 if (TREE_CODE (elem_type) == RECORD_TYPE
21186 && lookup_attribute ("sysv_abi va_list",
21187 TYPE_ATTRIBUTES (elem_type)))
21188 return sysv_va_list_type_node;
21191 return NULL_TREE;
21194 return std_canonical_va_list_type (type);
21197 /* Iterate through the target-specific builtin types for va_list.
21198 IDX denotes the iterator, *PTREE is set to the result type of
21199 the va_list builtin, and *PNAME to its internal type.
21200 Returns zero if there is no element for this index, otherwise
21201 IDX should be increased upon the next call.
21202 Note, do not iterate a base builtin's name like __builtin_va_list.
21203 Used from c_common_nodes_and_builtins. */
21205 static int
21206 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
21208 if (TARGET_64BIT)
21210 switch (idx)
21212 default:
21213 break;
21215 case 0:
21216 *ptree = ms_va_list_type_node;
21217 *pname = "__builtin_ms_va_list";
21218 return 1;
21220 case 1:
21221 *ptree = sysv_va_list_type_node;
21222 *pname = "__builtin_sysv_va_list";
21223 return 1;
21227 return 0;
21230 #undef TARGET_SCHED_DISPATCH
21231 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
21232 #undef TARGET_SCHED_DISPATCH_DO
21233 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
21234 #undef TARGET_SCHED_REASSOCIATION_WIDTH
21235 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
21236 #undef TARGET_SCHED_REORDER
21237 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
21238 #undef TARGET_SCHED_ADJUST_PRIORITY
21239 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
21240 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
21241 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
21242 ix86_dependencies_evaluation_hook
21245 /* Implementation of reassociation_width target hook used by
21246 reassoc phase to identify parallelism level in reassociated
21247 tree. Statements tree_code is passed in OPC. Arguments type
21248 is passed in MODE. */
21250 static int
21251 ix86_reassociation_width (unsigned int op, machine_mode mode)
21253 int width = 1;
21254 /* Vector part. */
21255 if (VECTOR_MODE_P (mode))
21257 int div = 1;
21258 if (INTEGRAL_MODE_P (mode))
21259 width = ix86_cost->reassoc_vec_int;
21260 else if (FLOAT_MODE_P (mode))
21261 width = ix86_cost->reassoc_vec_fp;
21263 if (width == 1)
21264 return 1;
21266 /* Integer vector instructions execute in FP unit
21267 and can execute 3 additions and one multiplication per cycle. */
21268 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2)
21269 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
21270 return 1;
21272 /* Account for targets that splits wide vectors into multiple parts. */
21273 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (mode) > 128)
21274 div = GET_MODE_BITSIZE (mode) / 128;
21275 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
21276 div = GET_MODE_BITSIZE (mode) / 64;
21277 width = (width + div - 1) / div;
21279 /* Scalar part. */
21280 else if (INTEGRAL_MODE_P (mode))
21281 width = ix86_cost->reassoc_int;
21282 else if (FLOAT_MODE_P (mode))
21283 width = ix86_cost->reassoc_fp;
21285 /* Avoid using too many registers in 32bit mode. */
21286 if (!TARGET_64BIT && width > 2)
21287 width = 2;
21288 return width;
21291 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
21292 place emms and femms instructions. */
21294 static machine_mode
21295 ix86_preferred_simd_mode (scalar_mode mode)
21297 if (!TARGET_SSE)
21298 return word_mode;
21300 switch (mode)
21302 case E_QImode:
21303 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
21304 return V64QImode;
21305 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21306 return V32QImode;
21307 else
21308 return V16QImode;
21310 case E_HImode:
21311 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
21312 return V32HImode;
21313 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21314 return V16HImode;
21315 else
21316 return V8HImode;
21318 case E_SImode:
21319 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21320 return V16SImode;
21321 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21322 return V8SImode;
21323 else
21324 return V4SImode;
21326 case E_DImode:
21327 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21328 return V8DImode;
21329 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21330 return V4DImode;
21331 else
21332 return V2DImode;
21334 case E_SFmode:
21335 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21336 return V16SFmode;
21337 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21338 return V8SFmode;
21339 else
21340 return V4SFmode;
21342 case E_DFmode:
21343 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21344 return V8DFmode;
21345 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21346 return V4DFmode;
21347 else if (TARGET_SSE2)
21348 return V2DFmode;
21349 /* FALLTHRU */
21351 default:
21352 return word_mode;
21356 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
21357 vectors. If AVX512F is enabled then try vectorizing with 512bit,
21358 256bit and 128bit vectors. */
21360 static void
21361 ix86_autovectorize_vector_sizes (vector_sizes *sizes, bool all)
21363 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21365 sizes->safe_push (64);
21366 sizes->safe_push (32);
21367 sizes->safe_push (16);
21369 else if (TARGET_AVX512F && all)
21371 sizes->safe_push (32);
21372 sizes->safe_push (16);
21373 sizes->safe_push (64);
21375 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21377 sizes->safe_push (32);
21378 sizes->safe_push (16);
21380 else if (TARGET_AVX && all)
21382 sizes->safe_push (16);
21383 sizes->safe_push (32);
21387 /* Implemenation of targetm.vectorize.get_mask_mode. */
21389 static opt_machine_mode
21390 ix86_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size)
21392 unsigned elem_size = vector_size / nunits;
21394 /* Scalar mask case. */
21395 if ((TARGET_AVX512F && vector_size == 64)
21396 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
21398 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
21399 return smallest_int_mode_for_size (nunits);
21402 scalar_int_mode elem_mode
21403 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
21405 gcc_assert (elem_size * nunits == vector_size);
21407 return mode_for_vector (elem_mode, nunits);
21412 /* Return class of registers which could be used for pseudo of MODE
21413 and of class RCLASS for spilling instead of memory. Return NO_REGS
21414 if it is not possible or non-profitable. */
21416 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
21418 static reg_class_t
21419 ix86_spill_class (reg_class_t rclass, machine_mode mode)
21421 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
21422 && TARGET_SSE2
21423 && TARGET_INTER_UNIT_MOVES_TO_VEC
21424 && TARGET_INTER_UNIT_MOVES_FROM_VEC
21425 && (mode == SImode || (TARGET_64BIT && mode == DImode))
21426 && INTEGER_CLASS_P (rclass))
21427 return ALL_SSE_REGS;
21428 return NO_REGS;
21431 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
21432 but returns a lower bound. */
21434 static unsigned int
21435 ix86_max_noce_ifcvt_seq_cost (edge e)
21437 bool predictable_p = predictable_edge_p (e);
21439 enum compiler_param param
21440 = (predictable_p
21441 ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
21442 : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST);
21444 /* If we have a parameter set, use that, otherwise take a guess using
21445 BRANCH_COST. */
21446 if (global_options_set.x_param_values[param])
21447 return PARAM_VALUE (param);
21448 else
21449 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
21452 /* Return true if SEQ is a good candidate as a replacement for the
21453 if-convertible sequence described in IF_INFO. */
21455 static bool
21456 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
21458 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
21460 int cmov_cnt = 0;
21461 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
21462 Maybe we should allow even more conditional moves as long as they
21463 are used far enough not to stall the CPU, or also consider
21464 IF_INFO->TEST_BB succ edge probabilities. */
21465 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
21467 rtx set = single_set (insn);
21468 if (!set)
21469 continue;
21470 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
21471 continue;
21472 rtx src = SET_SRC (set);
21473 machine_mode mode = GET_MODE (src);
21474 if (GET_MODE_CLASS (mode) != MODE_INT
21475 && GET_MODE_CLASS (mode) != MODE_FLOAT)
21476 continue;
21477 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
21478 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
21479 continue;
21480 /* insn is CMOV or FCMOV. */
21481 if (++cmov_cnt > 1)
21482 return false;
21485 return default_noce_conversion_profitable_p (seq, if_info);
21488 /* Implement targetm.vectorize.init_cost. */
21490 static void *
21491 ix86_init_cost (struct loop *)
21493 unsigned *cost = XNEWVEC (unsigned, 3);
21494 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
21495 return cost;
21498 /* Implement targetm.vectorize.add_stmt_cost. */
21500 static unsigned
21501 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
21502 struct _stmt_vec_info *stmt_info, int misalign,
21503 enum vect_cost_model_location where)
21505 unsigned *cost = (unsigned *) data;
21506 unsigned retval = 0;
21507 bool scalar_p
21508 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
21510 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
21511 int stmt_cost = - 1;
21513 bool fp = false;
21514 machine_mode mode = scalar_p ? SImode : TImode;
21516 if (vectype != NULL)
21518 fp = FLOAT_TYPE_P (vectype);
21519 mode = TYPE_MODE (vectype);
21520 if (scalar_p)
21521 mode = TYPE_MODE (TREE_TYPE (vectype));
21524 if ((kind == vector_stmt || kind == scalar_stmt)
21525 && stmt_info
21526 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
21528 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
21529 /*machine_mode inner_mode = mode;
21530 if (VECTOR_MODE_P (mode))
21531 inner_mode = GET_MODE_INNER (mode);*/
21533 switch (subcode)
21535 case PLUS_EXPR:
21536 case POINTER_PLUS_EXPR:
21537 case MINUS_EXPR:
21538 if (kind == scalar_stmt)
21540 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21541 stmt_cost = ix86_cost->addss;
21542 else if (X87_FLOAT_MODE_P (mode))
21543 stmt_cost = ix86_cost->fadd;
21544 else
21545 stmt_cost = ix86_cost->add;
21547 else
21548 stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
21549 : ix86_cost->sse_op);
21550 break;
21552 case MULT_EXPR:
21553 case WIDEN_MULT_EXPR:
21554 case MULT_HIGHPART_EXPR:
21555 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
21556 break;
21557 case NEGATE_EXPR:
21558 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21559 stmt_cost = ix86_cost->sse_op;
21560 else if (X87_FLOAT_MODE_P (mode))
21561 stmt_cost = ix86_cost->fchs;
21562 else if (VECTOR_MODE_P (mode))
21563 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
21564 else
21565 stmt_cost = ix86_cost->add;
21566 break;
21567 case TRUNC_DIV_EXPR:
21568 case CEIL_DIV_EXPR:
21569 case FLOOR_DIV_EXPR:
21570 case ROUND_DIV_EXPR:
21571 case TRUNC_MOD_EXPR:
21572 case CEIL_MOD_EXPR:
21573 case FLOOR_MOD_EXPR:
21574 case RDIV_EXPR:
21575 case ROUND_MOD_EXPR:
21576 case EXACT_DIV_EXPR:
21577 stmt_cost = ix86_division_cost (ix86_cost, mode);
21578 break;
21580 case RSHIFT_EXPR:
21581 case LSHIFT_EXPR:
21582 case LROTATE_EXPR:
21583 case RROTATE_EXPR:
21585 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
21586 stmt_cost = ix86_shift_rotate_cost
21587 (ix86_cost, mode,
21588 TREE_CODE (op2) == INTEGER_CST,
21589 cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1,
21590 true, false, false, NULL, NULL);
21592 break;
21593 case NOP_EXPR:
21594 /* Only sign-conversions are free. */
21595 if (tree_nop_conversion_p
21596 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
21597 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
21598 stmt_cost = 0;
21599 break;
21601 case BIT_IOR_EXPR:
21602 case ABS_EXPR:
21603 case ABSU_EXPR:
21604 case MIN_EXPR:
21605 case MAX_EXPR:
21606 case BIT_XOR_EXPR:
21607 case BIT_AND_EXPR:
21608 case BIT_NOT_EXPR:
21609 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21610 stmt_cost = ix86_cost->sse_op;
21611 else if (VECTOR_MODE_P (mode))
21612 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
21613 else
21614 stmt_cost = ix86_cost->add;
21615 break;
21616 default:
21617 break;
21621 combined_fn cfn;
21622 if ((kind == vector_stmt || kind == scalar_stmt)
21623 && stmt_info
21624 && stmt_info->stmt
21625 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
21626 switch (cfn)
21628 case CFN_FMA:
21629 stmt_cost = ix86_vec_cost (mode,
21630 mode == SFmode ? ix86_cost->fmass
21631 : ix86_cost->fmasd);
21632 break;
21633 default:
21634 break;
21637 /* If we do elementwise loads into a vector then we are bound by
21638 latency and execution resources for the many scalar loads
21639 (AGU and load ports). Try to account for this by scaling the
21640 construction cost by the number of elements involved. */
21641 if ((kind == vec_construct || kind == vec_to_scalar)
21642 && stmt_info
21643 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
21644 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
21645 && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
21646 && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) != INTEGER_CST)
21648 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
21649 stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
21651 if (stmt_cost == -1)
21652 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
21654 /* Penalize DFmode vector operations for Bonnell. */
21655 if (TARGET_BONNELL && kind == vector_stmt
21656 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
21657 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
21659 /* Statements in an inner loop relative to the loop being
21660 vectorized are weighted more heavily. The value here is
21661 arbitrary and could potentially be improved with analysis. */
21662 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
21663 count *= 50; /* FIXME. */
21665 retval = (unsigned) (count * stmt_cost);
21667 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
21668 for Silvermont as it has out of order integer pipeline and can execute
21669 2 scalar instruction per tick, but has in order SIMD pipeline. */
21670 if ((TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
21671 || TARGET_TREMONT || TARGET_INTEL) && stmt_info && stmt_info->stmt)
21673 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
21674 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
21675 retval = (retval * 17) / 10;
21678 cost[where] += retval;
21680 return retval;
21683 /* Implement targetm.vectorize.finish_cost. */
21685 static void
21686 ix86_finish_cost (void *data, unsigned *prologue_cost,
21687 unsigned *body_cost, unsigned *epilogue_cost)
21689 unsigned *cost = (unsigned *) data;
21690 *prologue_cost = cost[vect_prologue];
21691 *body_cost = cost[vect_body];
21692 *epilogue_cost = cost[vect_epilogue];
21695 /* Implement targetm.vectorize.destroy_cost_data. */
21697 static void
21698 ix86_destroy_cost_data (void *data)
21700 free (data);
21703 /* Validate target specific memory model bits in VAL. */
21705 static unsigned HOST_WIDE_INT
21706 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
21708 enum memmodel model = memmodel_from_int (val);
21709 bool strong;
21711 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
21712 |MEMMODEL_MASK)
21713 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
21715 warning (OPT_Winvalid_memory_model,
21716 "unknown architecture specific memory model");
21717 return MEMMODEL_SEQ_CST;
21719 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
21720 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
21722 warning (OPT_Winvalid_memory_model,
21723 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
21724 "memory model");
21725 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
21727 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
21729 warning (OPT_Winvalid_memory_model,
21730 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
21731 "memory model");
21732 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
21734 return val;
21737 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
21738 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
21739 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
21740 or number of vecsize_mangle variants that should be emitted. */
21742 static int
21743 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
21744 struct cgraph_simd_clone *clonei,
21745 tree base_type, int num)
21747 int ret = 1;
21749 if (clonei->simdlen
21750 && (clonei->simdlen < 2
21751 || clonei->simdlen > 1024
21752 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
21754 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21755 "unsupported simdlen %d", clonei->simdlen);
21756 return 0;
21759 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
21760 if (TREE_CODE (ret_type) != VOID_TYPE)
21761 switch (TYPE_MODE (ret_type))
21763 case E_QImode:
21764 case E_HImode:
21765 case E_SImode:
21766 case E_DImode:
21767 case E_SFmode:
21768 case E_DFmode:
21769 /* case E_SCmode: */
21770 /* case E_DCmode: */
21771 if (!AGGREGATE_TYPE_P (ret_type))
21772 break;
21773 /* FALLTHRU */
21774 default:
21775 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21776 "unsupported return type %qT for simd", ret_type);
21777 return 0;
21780 tree t;
21781 int i;
21782 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
21783 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
21785 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
21786 t && t != void_list_node; t = TREE_CHAIN (t), i++)
21788 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
21789 switch (TYPE_MODE (arg_type))
21791 case E_QImode:
21792 case E_HImode:
21793 case E_SImode:
21794 case E_DImode:
21795 case E_SFmode:
21796 case E_DFmode:
21797 /* case E_SCmode: */
21798 /* case E_DCmode: */
21799 if (!AGGREGATE_TYPE_P (arg_type))
21800 break;
21801 /* FALLTHRU */
21802 default:
21803 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
21804 break;
21805 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21806 "unsupported argument type %qT for simd", arg_type);
21807 return 0;
21811 if (!TREE_PUBLIC (node->decl))
21813 /* If the function isn't exported, we can pick up just one ISA
21814 for the clones. */
21815 if (TARGET_AVX512F)
21816 clonei->vecsize_mangle = 'e';
21817 else if (TARGET_AVX2)
21818 clonei->vecsize_mangle = 'd';
21819 else if (TARGET_AVX)
21820 clonei->vecsize_mangle = 'c';
21821 else
21822 clonei->vecsize_mangle = 'b';
21823 ret = 1;
21825 else
21827 clonei->vecsize_mangle = "bcde"[num];
21828 ret = 4;
21830 clonei->mask_mode = VOIDmode;
21831 switch (clonei->vecsize_mangle)
21833 case 'b':
21834 clonei->vecsize_int = 128;
21835 clonei->vecsize_float = 128;
21836 break;
21837 case 'c':
21838 clonei->vecsize_int = 128;
21839 clonei->vecsize_float = 256;
21840 break;
21841 case 'd':
21842 clonei->vecsize_int = 256;
21843 clonei->vecsize_float = 256;
21844 break;
21845 case 'e':
21846 clonei->vecsize_int = 512;
21847 clonei->vecsize_float = 512;
21848 if (TYPE_MODE (base_type) == QImode)
21849 clonei->mask_mode = DImode;
21850 else
21851 clonei->mask_mode = SImode;
21852 break;
21854 if (clonei->simdlen == 0)
21856 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
21857 clonei->simdlen = clonei->vecsize_int;
21858 else
21859 clonei->simdlen = clonei->vecsize_float;
21860 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
21862 else if (clonei->simdlen > 16)
21864 /* For compatibility with ICC, use the same upper bounds
21865 for simdlen. In particular, for CTYPE below, use the return type,
21866 unless the function returns void, in that case use the characteristic
21867 type. If it is possible for given SIMDLEN to pass CTYPE value
21868 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
21869 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
21870 emit corresponding clone. */
21871 tree ctype = ret_type;
21872 if (TREE_CODE (ret_type) == VOID_TYPE)
21873 ctype = base_type;
21874 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
21875 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
21876 cnt /= clonei->vecsize_int;
21877 else
21878 cnt /= clonei->vecsize_float;
21879 if (cnt > (TARGET_64BIT ? 16 : 8))
21881 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
21882 "unsupported simdlen %d", clonei->simdlen);
21883 return 0;
21886 return ret;
21889 /* If SIMD clone NODE can't be used in a vectorized loop
21890 in current function, return -1, otherwise return a badness of using it
21891 (0 if it is most desirable from vecsize_mangle point of view, 1
21892 slightly less desirable, etc.). */
21894 static int
21895 ix86_simd_clone_usable (struct cgraph_node *node)
21897 switch (node->simdclone->vecsize_mangle)
21899 case 'b':
21900 if (!TARGET_SSE2)
21901 return -1;
21902 if (!TARGET_AVX)
21903 return 0;
21904 return TARGET_AVX2 ? 2 : 1;
21905 case 'c':
21906 if (!TARGET_AVX)
21907 return -1;
21908 return TARGET_AVX2 ? 1 : 0;
21909 case 'd':
21910 if (!TARGET_AVX2)
21911 return -1;
21912 return 0;
21913 case 'e':
21914 if (!TARGET_AVX512F)
21915 return -1;
21916 return 0;
21917 default:
21918 gcc_unreachable ();
21922 /* This function adjusts the unroll factor based on
21923 the hardware capabilities. For ex, bdver3 has
21924 a loop buffer which makes unrolling of smaller
21925 loops less important. This function decides the
21926 unroll factor using number of memory references
21927 (value 32 is used) as a heuristic. */
21929 static unsigned
21930 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
21932 basic_block *bbs;
21933 rtx_insn *insn;
21934 unsigned i;
21935 unsigned mem_count = 0;
21937 if (!TARGET_ADJUST_UNROLL)
21938 return nunroll;
21940 /* Count the number of memory references within the loop body.
21941 This value determines the unrolling factor for bdver3 and bdver4
21942 architectures. */
21943 subrtx_iterator::array_type array;
21944 bbs = get_loop_body (loop);
21945 for (i = 0; i < loop->num_nodes; i++)
21946 FOR_BB_INSNS (bbs[i], insn)
21947 if (NONDEBUG_INSN_P (insn))
21948 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
21949 if (const_rtx x = *iter)
21950 if (MEM_P (x))
21952 machine_mode mode = GET_MODE (x);
21953 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
21954 if (n_words > 4)
21955 mem_count += 2;
21956 else
21957 mem_count += 1;
21959 free (bbs);
21961 if (mem_count && mem_count <=32)
21962 return MIN (nunroll, 32 / mem_count);
21964 return nunroll;
21968 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
21970 static bool
21971 ix86_float_exceptions_rounding_supported_p (void)
21973 /* For x87 floating point with standard excess precision handling,
21974 there is no adddf3 pattern (since x87 floating point only has
21975 XFmode operations) so the default hook implementation gets this
21976 wrong. */
21977 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
21980 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
21982 static void
21983 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
21985 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
21986 return;
21987 tree exceptions_var = create_tmp_var_raw (integer_type_node);
21988 if (TARGET_80387)
21990 tree fenv_index_type = build_index_type (size_int (6));
21991 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
21992 tree fenv_var = create_tmp_var_raw (fenv_type);
21993 TREE_ADDRESSABLE (fenv_var) = 1;
21994 tree fenv_ptr = build_pointer_type (fenv_type);
21995 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
21996 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
21997 tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
21998 tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
21999 tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
22000 tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
22001 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
22002 tree hold_fnclex = build_call_expr (fnclex, 0);
22003 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
22004 NULL_TREE, NULL_TREE);
22005 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
22006 hold_fnclex);
22007 *clear = build_call_expr (fnclex, 0);
22008 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
22009 tree fnstsw_call = build_call_expr (fnstsw, 0);
22010 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
22011 sw_var, fnstsw_call);
22012 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
22013 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
22014 exceptions_var, exceptions_x87);
22015 *update = build2 (COMPOUND_EXPR, integer_type_node,
22016 sw_mod, update_mod);
22017 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
22018 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
22020 if (TARGET_SSE && TARGET_SSE_MATH)
22022 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
22023 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
22024 tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
22025 tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
22026 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
22027 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
22028 mxcsr_orig_var, stmxcsr_hold_call);
22029 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
22030 mxcsr_orig_var,
22031 build_int_cst (unsigned_type_node, 0x1f80));
22032 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
22033 build_int_cst (unsigned_type_node, 0xffffffc0));
22034 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
22035 mxcsr_mod_var, hold_mod_val);
22036 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22037 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
22038 hold_assign_orig, hold_assign_mod);
22039 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
22040 ldmxcsr_hold_call);
22041 if (*hold)
22042 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
22043 else
22044 *hold = hold_all;
22045 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22046 if (*clear)
22047 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
22048 ldmxcsr_clear_call);
22049 else
22050 *clear = ldmxcsr_clear_call;
22051 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
22052 tree exceptions_sse = fold_convert (integer_type_node,
22053 stxmcsr_update_call);
22054 if (*update)
22056 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
22057 exceptions_var, exceptions_sse);
22058 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
22059 exceptions_var, exceptions_mod);
22060 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
22061 exceptions_assign);
22063 else
22064 *update = build2 (MODIFY_EXPR, integer_type_node,
22065 exceptions_var, exceptions_sse);
22066 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
22067 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22068 ldmxcsr_update_call);
22070 tree atomic_feraiseexcept
22071 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
22072 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
22073 1, exceptions_var);
22074 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22075 atomic_feraiseexcept_call);
22078 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
22079 /* For i386, common symbol is local only for non-PIE binaries. For
22080 x86-64, common symbol is local only for non-PIE binaries or linker
22081 supports copy reloc in PIE binaries. */
22083 static bool
22084 ix86_binds_local_p (const_tree exp)
22086 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
22087 (!flag_pic
22088 || (TARGET_64BIT
22089 && HAVE_LD_PIE_COPYRELOC != 0)));
22091 #endif
22093 /* If MEM is in the form of [base+offset], extract the two parts
22094 of address and set to BASE and OFFSET, otherwise return false. */
22096 static bool
22097 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
22099 rtx addr;
22101 gcc_assert (MEM_P (mem));
22103 addr = XEXP (mem, 0);
22105 if (GET_CODE (addr) == CONST)
22106 addr = XEXP (addr, 0);
22108 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
22110 *base = addr;
22111 *offset = const0_rtx;
22112 return true;
22115 if (GET_CODE (addr) == PLUS
22116 && (REG_P (XEXP (addr, 0))
22117 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
22118 && CONST_INT_P (XEXP (addr, 1)))
22120 *base = XEXP (addr, 0);
22121 *offset = XEXP (addr, 1);
22122 return true;
22125 return false;
22128 /* Given OPERANDS of consecutive load/store, check if we can merge
22129 them into move multiple. LOAD is true if they are load instructions.
22130 MODE is the mode of memory operands. */
22132 bool
22133 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
22134 machine_mode mode)
22136 HOST_WIDE_INT offval_1, offval_2, msize;
22137 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
22139 if (load)
22141 mem_1 = operands[1];
22142 mem_2 = operands[3];
22143 reg_1 = operands[0];
22144 reg_2 = operands[2];
22146 else
22148 mem_1 = operands[0];
22149 mem_2 = operands[2];
22150 reg_1 = operands[1];
22151 reg_2 = operands[3];
22154 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
22156 if (REGNO (reg_1) != REGNO (reg_2))
22157 return false;
22159 /* Check if the addresses are in the form of [base+offset]. */
22160 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
22161 return false;
22162 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
22163 return false;
22165 /* Check if the bases are the same. */
22166 if (!rtx_equal_p (base_1, base_2))
22167 return false;
22169 offval_1 = INTVAL (offset_1);
22170 offval_2 = INTVAL (offset_2);
22171 msize = GET_MODE_SIZE (mode);
22172 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
22173 if (offval_1 + msize != offval_2)
22174 return false;
22176 return true;
22179 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
22181 static bool
22182 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
22183 optimization_type opt_type)
22185 switch (op)
22187 case asin_optab:
22188 case acos_optab:
22189 case log1p_optab:
22190 case exp_optab:
22191 case exp10_optab:
22192 case exp2_optab:
22193 case expm1_optab:
22194 case ldexp_optab:
22195 case scalb_optab:
22196 case round_optab:
22197 return opt_type == OPTIMIZE_FOR_SPEED;
22199 case rint_optab:
22200 if (SSE_FLOAT_MODE_P (mode1)
22201 && TARGET_SSE_MATH
22202 && !flag_trapping_math
22203 && !TARGET_SSE4_1)
22204 return opt_type == OPTIMIZE_FOR_SPEED;
22205 return true;
22207 case floor_optab:
22208 case ceil_optab:
22209 case btrunc_optab:
22210 if (SSE_FLOAT_MODE_P (mode1)
22211 && TARGET_SSE_MATH
22212 && !flag_trapping_math
22213 && TARGET_SSE4_1)
22214 return true;
22215 return opt_type == OPTIMIZE_FOR_SPEED;
22217 case rsqrt_optab:
22218 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
22220 default:
22221 return true;
22225 /* Address space support.
22227 This is not "far pointers" in the 16-bit sense, but an easy way
22228 to use %fs and %gs segment prefixes. Therefore:
22230 (a) All address spaces have the same modes,
22231 (b) All address spaces have the same addresss forms,
22232 (c) While %fs and %gs are technically subsets of the generic
22233 address space, they are probably not subsets of each other.
22234 (d) Since we have no access to the segment base register values
22235 without resorting to a system call, we cannot convert a
22236 non-default address space to a default address space.
22237 Therefore we do not claim %fs or %gs are subsets of generic.
22239 Therefore we can (mostly) use the default hooks. */
22241 /* All use of segmentation is assumed to make address 0 valid. */
22243 static bool
22244 ix86_addr_space_zero_address_valid (addr_space_t as)
22246 return as != ADDR_SPACE_GENERIC;
22249 static void
22250 ix86_init_libfuncs (void)
22252 if (TARGET_64BIT)
22254 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
22255 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
22257 else
22259 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
22260 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
22263 #if TARGET_MACHO
22264 darwin_rename_builtins ();
22265 #endif
22268 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
22269 FPU, assume that the fpcw is set to extended precision; when using
22270 only SSE, rounding is correct; when using both SSE and the FPU,
22271 the rounding precision is indeterminate, since either may be chosen
22272 apparently at random. */
22274 static enum flt_eval_method
22275 ix86_excess_precision (enum excess_precision_type type)
22277 switch (type)
22279 case EXCESS_PRECISION_TYPE_FAST:
22280 /* The fastest type to promote to will always be the native type,
22281 whether that occurs with implicit excess precision or
22282 otherwise. */
22283 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22284 case EXCESS_PRECISION_TYPE_STANDARD:
22285 case EXCESS_PRECISION_TYPE_IMPLICIT:
22286 /* Otherwise, the excess precision we want when we are
22287 in a standards compliant mode, and the implicit precision we
22288 provide would be identical were it not for the unpredictable
22289 cases. */
22290 if (!TARGET_80387)
22291 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22292 else if (!TARGET_MIX_SSE_I387)
22294 if (!(TARGET_SSE && TARGET_SSE_MATH))
22295 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
22296 else if (TARGET_SSE2)
22297 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22300 /* If we are in standards compliant mode, but we know we will
22301 calculate in unpredictable precision, return
22302 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
22303 excess precision if the target can't guarantee it will honor
22304 it. */
22305 return (type == EXCESS_PRECISION_TYPE_STANDARD
22306 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
22307 : FLT_EVAL_METHOD_UNPREDICTABLE);
22308 default:
22309 gcc_unreachable ();
22312 return FLT_EVAL_METHOD_UNPREDICTABLE;
22315 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
22316 decrements by exactly 2 no matter what the position was, there is no pushb.
22318 But as CIE data alignment factor on this arch is -4 for 32bit targets
22319 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
22320 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
22322 poly_int64
22323 ix86_push_rounding (poly_int64 bytes)
22325 return ROUND_UP (bytes, UNITS_PER_WORD);
22328 /* Target-specific selftests. */
22330 #if CHECKING_P
22332 namespace selftest {
22334 /* Verify that hard regs are dumped as expected (in compact mode). */
22336 static void
22337 ix86_test_dumping_hard_regs ()
22339 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
22340 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
22343 /* Test dumping an insn with repeated references to the same SCRATCH,
22344 to verify the rtx_reuse code. */
22346 static void
22347 ix86_test_dumping_memory_blockage ()
22349 set_new_first_and_last_insn (NULL, NULL);
22351 rtx pat = gen_memory_blockage ();
22352 rtx_reuse_manager r;
22353 r.preprocess (pat);
22355 /* Verify that the repeated references to the SCRATCH show use
22356 reuse IDS. The first should be prefixed with a reuse ID,
22357 and the second should be dumped as a "reuse_rtx" of that ID.
22358 The expected string assumes Pmode == DImode. */
22359 if (Pmode == DImode)
22360 ASSERT_RTL_DUMP_EQ_WITH_REUSE
22361 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
22362 " (unspec:BLK [\n"
22363 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
22364 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
22367 /* Verify loading an RTL dump; specifically a dump of copying
22368 a param on x86_64 from a hard reg into the frame.
22369 This test is target-specific since the dump contains target-specific
22370 hard reg names. */
22372 static void
22373 ix86_test_loading_dump_fragment_1 ()
22375 rtl_dump_test t (SELFTEST_LOCATION,
22376 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
22378 rtx_insn *insn = get_insn_by_uid (1);
22380 /* The block structure and indentation here is purely for
22381 readability; it mirrors the structure of the rtx. */
22382 tree mem_expr;
22384 rtx pat = PATTERN (insn);
22385 ASSERT_EQ (SET, GET_CODE (pat));
22387 rtx dest = SET_DEST (pat);
22388 ASSERT_EQ (MEM, GET_CODE (dest));
22389 /* Verify the "/c" was parsed. */
22390 ASSERT_TRUE (RTX_FLAG (dest, call));
22391 ASSERT_EQ (SImode, GET_MODE (dest));
22393 rtx addr = XEXP (dest, 0);
22394 ASSERT_EQ (PLUS, GET_CODE (addr));
22395 ASSERT_EQ (DImode, GET_MODE (addr));
22397 rtx lhs = XEXP (addr, 0);
22398 /* Verify that the "frame" REG was consolidated. */
22399 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
22402 rtx rhs = XEXP (addr, 1);
22403 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
22404 ASSERT_EQ (-4, INTVAL (rhs));
22407 /* Verify the "[1 i+0 S4 A32]" was parsed. */
22408 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
22409 /* "i" should have been handled by synthesizing a global int
22410 variable named "i". */
22411 mem_expr = MEM_EXPR (dest);
22412 ASSERT_NE (mem_expr, NULL);
22413 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
22414 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
22415 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
22416 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
22417 /* "+0". */
22418 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
22419 ASSERT_EQ (0, MEM_OFFSET (dest));
22420 /* "S4". */
22421 ASSERT_EQ (4, MEM_SIZE (dest));
22422 /* "A32. */
22423 ASSERT_EQ (32, MEM_ALIGN (dest));
22426 rtx src = SET_SRC (pat);
22427 ASSERT_EQ (REG, GET_CODE (src));
22428 ASSERT_EQ (SImode, GET_MODE (src));
22429 ASSERT_EQ (5, REGNO (src));
22430 tree reg_expr = REG_EXPR (src);
22431 /* "i" here should point to the same var as for the MEM_EXPR. */
22432 ASSERT_EQ (reg_expr, mem_expr);
22437 /* Verify that the RTL loader copes with a call_insn dump.
22438 This test is target-specific since the dump contains a target-specific
22439 hard reg name. */
22441 static void
22442 ix86_test_loading_call_insn ()
22444 /* The test dump includes register "xmm0", where requires TARGET_SSE
22445 to exist. */
22446 if (!TARGET_SSE)
22447 return;
22449 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
22451 rtx_insn *insn = get_insns ();
22452 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
22454 /* "/j". */
22455 ASSERT_TRUE (RTX_FLAG (insn, jump));
22457 rtx pat = PATTERN (insn);
22458 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
22460 /* Verify REG_NOTES. */
22462 /* "(expr_list:REG_CALL_DECL". */
22463 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
22464 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
22465 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
22467 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
22468 rtx_expr_list *note1 = note0->next ();
22469 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
22471 ASSERT_EQ (NULL, note1->next ());
22474 /* Verify CALL_INSN_FUNCTION_USAGE. */
22476 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
22477 rtx_expr_list *usage
22478 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
22479 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
22480 ASSERT_EQ (DFmode, GET_MODE (usage));
22481 ASSERT_EQ (USE, GET_CODE (usage->element ()));
22482 ASSERT_EQ (NULL, usage->next ());
22486 /* Verify that the RTL loader copes a dump from print_rtx_function.
22487 This test is target-specific since the dump contains target-specific
22488 hard reg names. */
22490 static void
22491 ix86_test_loading_full_dump ()
22493 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
22495 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
22497 rtx_insn *insn_1 = get_insn_by_uid (1);
22498 ASSERT_EQ (NOTE, GET_CODE (insn_1));
22500 rtx_insn *insn_7 = get_insn_by_uid (7);
22501 ASSERT_EQ (INSN, GET_CODE (insn_7));
22502 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
22504 rtx_insn *insn_15 = get_insn_by_uid (15);
22505 ASSERT_EQ (INSN, GET_CODE (insn_15));
22506 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
22508 /* Verify crtl->return_rtx. */
22509 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
22510 ASSERT_EQ (0, REGNO (crtl->return_rtx));
22511 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
22514 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
22515 In particular, verify that it correctly loads the 2nd operand.
22516 This test is target-specific since these are machine-specific
22517 operands (and enums). */
22519 static void
22520 ix86_test_loading_unspec ()
22522 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
22524 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
22526 ASSERT_TRUE (cfun);
22528 /* Test of an UNSPEC. */
22529 rtx_insn *insn = get_insns ();
22530 ASSERT_EQ (INSN, GET_CODE (insn));
22531 rtx set = single_set (insn);
22532 ASSERT_NE (NULL, set);
22533 rtx dst = SET_DEST (set);
22534 ASSERT_EQ (MEM, GET_CODE (dst));
22535 rtx src = SET_SRC (set);
22536 ASSERT_EQ (UNSPEC, GET_CODE (src));
22537 ASSERT_EQ (BLKmode, GET_MODE (src));
22538 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
22540 rtx v0 = XVECEXP (src, 0, 0);
22542 /* Verify that the two uses of the first SCRATCH have pointer
22543 equality. */
22544 rtx scratch_a = XEXP (dst, 0);
22545 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
22547 rtx scratch_b = XEXP (v0, 0);
22548 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
22550 ASSERT_EQ (scratch_a, scratch_b);
22552 /* Verify that the two mems are thus treated as equal. */
22553 ASSERT_TRUE (rtx_equal_p (dst, v0));
22555 /* Verify the the insn is recognized. */
22556 ASSERT_NE(-1, recog_memoized (insn));
22558 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
22559 insn = NEXT_INSN (insn);
22560 ASSERT_EQ (INSN, GET_CODE (insn));
22562 set = single_set (insn);
22563 ASSERT_NE (NULL, set);
22565 src = SET_SRC (set);
22566 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
22567 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
22570 /* Run all target-specific selftests. */
22572 static void
22573 ix86_run_selftests (void)
22575 ix86_test_dumping_hard_regs ();
22576 ix86_test_dumping_memory_blockage ();
22578 /* Various tests of loading RTL dumps, here because they contain
22579 ix86-isms (e.g. names of hard regs). */
22580 ix86_test_loading_dump_fragment_1 ();
22581 ix86_test_loading_call_insn ();
22582 ix86_test_loading_full_dump ();
22583 ix86_test_loading_unspec ();
22586 } // namespace selftest
22588 #endif /* CHECKING_P */
22590 /* Initialize the GCC target structure. */
22591 #undef TARGET_RETURN_IN_MEMORY
22592 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
22594 #undef TARGET_LEGITIMIZE_ADDRESS
22595 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
22597 #undef TARGET_ATTRIBUTE_TABLE
22598 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22599 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
22600 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
22601 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22602 # undef TARGET_MERGE_DECL_ATTRIBUTES
22603 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22604 #endif
22606 #undef TARGET_COMP_TYPE_ATTRIBUTES
22607 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22609 #undef TARGET_INIT_BUILTINS
22610 #define TARGET_INIT_BUILTINS ix86_init_builtins
22611 #undef TARGET_BUILTIN_DECL
22612 #define TARGET_BUILTIN_DECL ix86_builtin_decl
22613 #undef TARGET_EXPAND_BUILTIN
22614 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22616 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22617 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
22618 ix86_builtin_vectorized_function
22620 #undef TARGET_VECTORIZE_BUILTIN_GATHER
22621 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
22623 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
22624 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
22626 #undef TARGET_BUILTIN_RECIPROCAL
22627 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
22629 #undef TARGET_ASM_FUNCTION_EPILOGUE
22630 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22632 #undef TARGET_ENCODE_SECTION_INFO
22633 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22634 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22635 #else
22636 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22637 #endif
22639 #undef TARGET_ASM_OPEN_PAREN
22640 #define TARGET_ASM_OPEN_PAREN ""
22641 #undef TARGET_ASM_CLOSE_PAREN
22642 #define TARGET_ASM_CLOSE_PAREN ""
22644 #undef TARGET_ASM_BYTE_OP
22645 #define TARGET_ASM_BYTE_OP ASM_BYTE
22647 #undef TARGET_ASM_ALIGNED_HI_OP
22648 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22649 #undef TARGET_ASM_ALIGNED_SI_OP
22650 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22651 #ifdef ASM_QUAD
22652 #undef TARGET_ASM_ALIGNED_DI_OP
22653 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22654 #endif
22656 #undef TARGET_PROFILE_BEFORE_PROLOGUE
22657 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
22659 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
22660 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
22662 #undef TARGET_ASM_UNALIGNED_HI_OP
22663 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22664 #undef TARGET_ASM_UNALIGNED_SI_OP
22665 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22666 #undef TARGET_ASM_UNALIGNED_DI_OP
22667 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22669 #undef TARGET_PRINT_OPERAND
22670 #define TARGET_PRINT_OPERAND ix86_print_operand
22671 #undef TARGET_PRINT_OPERAND_ADDRESS
22672 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
22673 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
22674 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
22675 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
22676 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
22678 #undef TARGET_SCHED_INIT_GLOBAL
22679 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
22680 #undef TARGET_SCHED_ADJUST_COST
22681 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22682 #undef TARGET_SCHED_ISSUE_RATE
22683 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22684 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22685 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22686 ia32_multipass_dfa_lookahead
22687 #undef TARGET_SCHED_MACRO_FUSION_P
22688 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
22689 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
22690 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
22692 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22693 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22695 #undef TARGET_MEMMODEL_CHECK
22696 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
22698 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
22699 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
22701 #ifdef HAVE_AS_TLS
22702 #undef TARGET_HAVE_TLS
22703 #define TARGET_HAVE_TLS true
22704 #endif
22705 #undef TARGET_CANNOT_FORCE_CONST_MEM
22706 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22707 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22708 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
22710 #undef TARGET_DELEGITIMIZE_ADDRESS
22711 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22713 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
22714 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
22716 #undef TARGET_MS_BITFIELD_LAYOUT_P
22717 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22719 #if TARGET_MACHO
22720 #undef TARGET_BINDS_LOCAL_P
22721 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22722 #else
22723 #undef TARGET_BINDS_LOCAL_P
22724 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
22725 #endif
22726 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22727 #undef TARGET_BINDS_LOCAL_P
22728 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22729 #endif
22731 #undef TARGET_ASM_OUTPUT_MI_THUNK
22732 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22733 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22734 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22736 #undef TARGET_ASM_FILE_START
22737 #define TARGET_ASM_FILE_START x86_file_start
22739 #undef TARGET_OPTION_OVERRIDE
22740 #define TARGET_OPTION_OVERRIDE ix86_option_override
22742 #undef TARGET_REGISTER_MOVE_COST
22743 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
22744 #undef TARGET_MEMORY_MOVE_COST
22745 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
22746 #undef TARGET_RTX_COSTS
22747 #define TARGET_RTX_COSTS ix86_rtx_costs
22748 #undef TARGET_ADDRESS_COST
22749 #define TARGET_ADDRESS_COST ix86_address_cost
22751 #undef TARGET_FLAGS_REGNUM
22752 #define TARGET_FLAGS_REGNUM FLAGS_REG
22753 #undef TARGET_FIXED_CONDITION_CODE_REGS
22754 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22755 #undef TARGET_CC_MODES_COMPATIBLE
22756 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22758 #undef TARGET_MACHINE_DEPENDENT_REORG
22759 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22761 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
22762 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
22764 #undef TARGET_BUILD_BUILTIN_VA_LIST
22765 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22767 #undef TARGET_FOLD_BUILTIN
22768 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
22770 #undef TARGET_GIMPLE_FOLD_BUILTIN
22771 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
22773 #undef TARGET_COMPARE_VERSION_PRIORITY
22774 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
22776 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
22777 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
22778 ix86_generate_version_dispatcher_body
22780 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
22781 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
22782 ix86_get_function_versions_dispatcher
22784 #undef TARGET_ENUM_VA_LIST_P
22785 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
22787 #undef TARGET_FN_ABI_VA_LIST
22788 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
22790 #undef TARGET_CANONICAL_VA_LIST_TYPE
22791 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
22793 #undef TARGET_EXPAND_BUILTIN_VA_START
22794 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
22796 #undef TARGET_MD_ASM_ADJUST
22797 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
22799 #undef TARGET_C_EXCESS_PRECISION
22800 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
22801 #undef TARGET_PROMOTE_PROTOTYPES
22802 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
22803 #undef TARGET_SETUP_INCOMING_VARARGS
22804 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22805 #undef TARGET_MUST_PASS_IN_STACK
22806 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22807 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
22808 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
22809 #undef TARGET_FUNCTION_ARG_ADVANCE
22810 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
22811 #undef TARGET_FUNCTION_ARG
22812 #define TARGET_FUNCTION_ARG ix86_function_arg
22813 #undef TARGET_INIT_PIC_REG
22814 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
22815 #undef TARGET_USE_PSEUDO_PIC_REG
22816 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
22817 #undef TARGET_FUNCTION_ARG_BOUNDARY
22818 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
22819 #undef TARGET_PASS_BY_REFERENCE
22820 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22821 #undef TARGET_INTERNAL_ARG_POINTER
22822 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22823 #undef TARGET_UPDATE_STACK_BOUNDARY
22824 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
22825 #undef TARGET_GET_DRAP_RTX
22826 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
22827 #undef TARGET_STRICT_ARGUMENT_NAMING
22828 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22829 #undef TARGET_STATIC_CHAIN
22830 #define TARGET_STATIC_CHAIN ix86_static_chain
22831 #undef TARGET_TRAMPOLINE_INIT
22832 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
22833 #undef TARGET_RETURN_POPS_ARGS
22834 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
22836 #undef TARGET_WARN_FUNC_RETURN
22837 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
22839 #undef TARGET_LEGITIMATE_COMBINED_INSN
22840 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
22842 #undef TARGET_ASAN_SHADOW_OFFSET
22843 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
22845 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22846 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22848 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22849 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22851 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22852 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22854 #undef TARGET_C_MODE_FOR_SUFFIX
22855 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
22857 #ifdef HAVE_AS_TLS
22858 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22859 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22860 #endif
22862 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22863 #undef TARGET_INSERT_ATTRIBUTES
22864 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22865 #endif
22867 #undef TARGET_MANGLE_TYPE
22868 #define TARGET_MANGLE_TYPE ix86_mangle_type
22870 #undef TARGET_STACK_PROTECT_GUARD
22871 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
22873 #if !TARGET_MACHO
22874 #undef TARGET_STACK_PROTECT_FAIL
22875 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22876 #endif
22878 #undef TARGET_FUNCTION_VALUE
22879 #define TARGET_FUNCTION_VALUE ix86_function_value
22881 #undef TARGET_FUNCTION_VALUE_REGNO_P
22882 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
22884 #undef TARGET_PROMOTE_FUNCTION_MODE
22885 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
22887 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
22888 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
22890 #undef TARGET_MEMBER_TYPE_FORCES_BLK
22891 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
22893 #undef TARGET_INSTANTIATE_DECLS
22894 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
22896 #undef TARGET_SECONDARY_RELOAD
22897 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
22898 #undef TARGET_SECONDARY_MEMORY_NEEDED
22899 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
22900 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
22901 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
22903 #undef TARGET_CLASS_MAX_NREGS
22904 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
22906 #undef TARGET_PREFERRED_RELOAD_CLASS
22907 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
22908 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
22909 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
22910 #undef TARGET_CLASS_LIKELY_SPILLED_P
22911 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
22913 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
22914 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
22915 ix86_builtin_vectorization_cost
22916 #undef TARGET_VECTORIZE_VEC_PERM_CONST
22917 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
22918 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
22919 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
22920 ix86_preferred_simd_mode
22921 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
22922 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
22923 ix86_split_reduction
22924 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
22925 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
22926 ix86_autovectorize_vector_sizes
22927 #undef TARGET_VECTORIZE_GET_MASK_MODE
22928 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
22929 #undef TARGET_VECTORIZE_INIT_COST
22930 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
22931 #undef TARGET_VECTORIZE_ADD_STMT_COST
22932 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
22933 #undef TARGET_VECTORIZE_FINISH_COST
22934 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
22935 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
22936 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
22938 #undef TARGET_SET_CURRENT_FUNCTION
22939 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
22941 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
22942 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
22944 #undef TARGET_OPTION_SAVE
22945 #define TARGET_OPTION_SAVE ix86_function_specific_save
22947 #undef TARGET_OPTION_RESTORE
22948 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
22950 #undef TARGET_OPTION_POST_STREAM_IN
22951 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
22953 #undef TARGET_OPTION_PRINT
22954 #define TARGET_OPTION_PRINT ix86_function_specific_print
22956 #undef TARGET_OPTION_FUNCTION_VERSIONS
22957 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
22959 #undef TARGET_CAN_INLINE_P
22960 #define TARGET_CAN_INLINE_P ix86_can_inline_p
22962 #undef TARGET_LEGITIMATE_ADDRESS_P
22963 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
22965 #undef TARGET_REGISTER_PRIORITY
22966 #define TARGET_REGISTER_PRIORITY ix86_register_priority
22968 #undef TARGET_REGISTER_USAGE_LEVELING_P
22969 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
22971 #undef TARGET_LEGITIMATE_CONSTANT_P
22972 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
22974 #undef TARGET_COMPUTE_FRAME_LAYOUT
22975 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
22977 #undef TARGET_FRAME_POINTER_REQUIRED
22978 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
22980 #undef TARGET_CAN_ELIMINATE
22981 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
22983 #undef TARGET_EXTRA_LIVE_ON_ENTRY
22984 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
22986 #undef TARGET_ASM_CODE_END
22987 #define TARGET_ASM_CODE_END ix86_code_end
22989 #undef TARGET_CONDITIONAL_REGISTER_USAGE
22990 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
22992 #undef TARGET_CANONICALIZE_COMPARISON
22993 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
22995 #undef TARGET_LOOP_UNROLL_ADJUST
22996 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
22998 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
22999 #undef TARGET_SPILL_CLASS
23000 #define TARGET_SPILL_CLASS ix86_spill_class
23002 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
23003 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
23004 ix86_simd_clone_compute_vecsize_and_simdlen
23006 #undef TARGET_SIMD_CLONE_ADJUST
23007 #define TARGET_SIMD_CLONE_ADJUST \
23008 ix86_simd_clone_adjust
23010 #undef TARGET_SIMD_CLONE_USABLE
23011 #define TARGET_SIMD_CLONE_USABLE \
23012 ix86_simd_clone_usable
23014 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
23015 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
23016 ix86_float_exceptions_rounding_supported_p
23018 #undef TARGET_MODE_EMIT
23019 #define TARGET_MODE_EMIT ix86_emit_mode_set
23021 #undef TARGET_MODE_NEEDED
23022 #define TARGET_MODE_NEEDED ix86_mode_needed
23024 #undef TARGET_MODE_AFTER
23025 #define TARGET_MODE_AFTER ix86_mode_after
23027 #undef TARGET_MODE_ENTRY
23028 #define TARGET_MODE_ENTRY ix86_mode_entry
23030 #undef TARGET_MODE_EXIT
23031 #define TARGET_MODE_EXIT ix86_mode_exit
23033 #undef TARGET_MODE_PRIORITY
23034 #define TARGET_MODE_PRIORITY ix86_mode_priority
23036 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
23037 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
23039 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
23040 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
23042 #undef TARGET_OFFLOAD_OPTIONS
23043 #define TARGET_OFFLOAD_OPTIONS \
23044 ix86_offload_options
23046 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
23047 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
23049 #undef TARGET_OPTAB_SUPPORTED_P
23050 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
23052 #undef TARGET_HARD_REGNO_SCRATCH_OK
23053 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
23055 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
23056 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
23058 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
23059 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
23061 #undef TARGET_INIT_LIBFUNCS
23062 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
23064 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
23065 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
23067 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
23068 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
23070 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
23071 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
23073 #undef TARGET_HARD_REGNO_NREGS
23074 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
23075 #undef TARGET_HARD_REGNO_MODE_OK
23076 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
23078 #undef TARGET_MODES_TIEABLE_P
23079 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
23081 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
23082 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
23083 ix86_hard_regno_call_part_clobbered
23085 #undef TARGET_CAN_CHANGE_MODE_CLASS
23086 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
23088 #undef TARGET_STATIC_RTX_ALIGNMENT
23089 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
23090 #undef TARGET_CONSTANT_ALIGNMENT
23091 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
23093 #undef TARGET_EMPTY_RECORD_P
23094 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
23096 #undef TARGET_WARN_PARAMETER_PASSING_ABI
23097 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
23099 #undef TARGET_GET_MULTILIB_ABI_NAME
23100 #define TARGET_GET_MULTILIB_ABI_NAME \
23101 ix86_get_multilib_abi_name
23103 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
23105 #ifdef OPTION_GLIBC
23106 if (OPTION_GLIBC)
23107 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
23108 else
23109 return false;
23110 #else
23111 return false;
23112 #endif
23115 #undef TARGET_LIBC_HAS_FAST_FUNCTION
23116 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
23118 #if CHECKING_P
23119 #undef TARGET_RUN_TARGET_SELFTESTS
23120 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
23121 #endif /* #if CHECKING_P */
23123 struct gcc_target targetm = TARGET_INITIALIZER;
23125 #include "gt-i386.h"