i386: Fix up x86_function_profiler -masm=intel support [PR114094]
[official-gcc.git] / gcc / config / i386 / i386.cc
blob86381b0509484e5f9c99ece2e84f3f7e9f518ecb
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define INCLUDE_STRING
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic.h"
43 #include "cfgbuild.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "attribs.h"
47 #include "calls.h"
48 #include "stor-layout.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "except.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "common/common-target.h"
58 #include "langhooks.h"
59 #include "reload.h"
60 #include "gimplify.h"
61 #include "dwarf2.h"
62 #include "tm-constrs.h"
63 #include "cselib.h"
64 #include "sched-int.h"
65 #include "opts.h"
66 #include "tree-pass.h"
67 #include "context.h"
68 #include "pass_manager.h"
69 #include "target-globals.h"
70 #include "gimple-iterator.h"
71 #include "gimple-fold.h"
72 #include "tree-vectorizer.h"
73 #include "shrink-wrap.h"
74 #include "builtins.h"
75 #include "rtl-iter.h"
76 #include "tree-iterator.h"
77 #include "dbgcnt.h"
78 #include "case-cfn-macros.h"
79 #include "dojump.h"
80 #include "fold-const-call.h"
81 #include "tree-vrp.h"
82 #include "tree-ssanames.h"
83 #include "selftest.h"
84 #include "selftest-rtl.h"
85 #include "print-rtl.h"
86 #include "intl.h"
87 #include "ifcvt.h"
88 #include "symbol-summary.h"
89 #include "sreal.h"
90 #include "ipa-cp.h"
91 #include "ipa-prop.h"
92 #include "ipa-fnsummary.h"
93 #include "wide-int-bitmask.h"
94 #include "tree-vector-builder.h"
95 #include "debug.h"
96 #include "dwarf2out.h"
97 #include "i386-options.h"
98 #include "i386-builtins.h"
99 #include "i386-expand.h"
100 #include "i386-features.h"
101 #include "function-abi.h"
102 #include "rtl-error.h"
104 /* This file should be included last. */
105 #include "target-def.h"
107 static rtx legitimize_dllimport_symbol (rtx, bool);
108 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
109 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
110 static void ix86_emit_restore_reg_using_pop (rtx, bool = false);
113 #ifndef CHECK_STACK_LIMIT
114 #define CHECK_STACK_LIMIT (-1)
115 #endif
117 /* Return index of given mode in mult and division cost tables. */
118 #define MODE_INDEX(mode) \
119 ((mode) == QImode ? 0 \
120 : (mode) == HImode ? 1 \
121 : (mode) == SImode ? 2 \
122 : (mode) == DImode ? 3 \
123 : 4)
126 /* Set by -mtune. */
127 const struct processor_costs *ix86_tune_cost = NULL;
129 /* Set by -mtune or -Os. */
130 const struct processor_costs *ix86_cost = NULL;
132 /* In case the average insn count for single function invocation is
133 lower than this constant, emit fast (but longer) prologue and
134 epilogue code. */
135 #define FAST_PROLOGUE_INSN_COUNT 20
137 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
138 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
139 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
140 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
142 /* Array of the smallest class containing reg number REGNO, indexed by
143 REGNO. Used by REGNO_REG_CLASS in i386.h. */
145 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
147 /* ax, dx, cx, bx */
148 AREG, DREG, CREG, BREG,
149 /* si, di, bp, sp */
150 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
151 /* FP registers */
152 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
153 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
154 /* arg pointer, flags, fpsr, frame */
155 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
156 /* SSE registers */
157 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
158 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
159 /* MMX registers */
160 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
161 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
162 /* REX registers */
163 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
164 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
165 /* SSE REX registers */
166 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
167 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
168 /* AVX-512 SSE registers */
169 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
170 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
171 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
172 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
173 /* Mask registers. */
174 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
175 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
176 /* REX2 registers */
177 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
178 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
179 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
180 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
183 /* The "default" register map used in 32bit mode. */
185 int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
187 /* general regs */
188 0, 2, 1, 3, 6, 7, 4, 5,
189 /* fp regs */
190 12, 13, 14, 15, 16, 17, 18, 19,
191 /* arg, flags, fpsr, frame */
192 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
193 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
194 /* SSE */
195 21, 22, 23, 24, 25, 26, 27, 28,
196 /* MMX */
197 29, 30, 31, 32, 33, 34, 35, 36,
198 /* extended integer registers */
199 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
201 /* extended sse registers */
202 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
203 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
204 /* AVX-512 registers 16-23 */
205 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
206 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
207 /* AVX-512 registers 24-31 */
208 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
209 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
210 /* Mask registers */
211 93, 94, 95, 96, 97, 98, 99, 100
214 /* The "default" register map used in 64bit mode. */
216 int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
218 /* general regs */
219 0, 1, 2, 3, 4, 5, 6, 7,
220 /* fp regs */
221 33, 34, 35, 36, 37, 38, 39, 40,
222 /* arg, flags, fpsr, frame */
223 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
224 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
225 /* SSE */
226 17, 18, 19, 20, 21, 22, 23, 24,
227 /* MMX */
228 41, 42, 43, 44, 45, 46, 47, 48,
229 /* extended integer registers */
230 8, 9, 10, 11, 12, 13, 14, 15,
231 /* extended SSE registers */
232 25, 26, 27, 28, 29, 30, 31, 32,
233 /* AVX-512 registers 16-23 */
234 67, 68, 69, 70, 71, 72, 73, 74,
235 /* AVX-512 registers 24-31 */
236 75, 76, 77, 78, 79, 80, 81, 82,
237 /* Mask registers */
238 118, 119, 120, 121, 122, 123, 124, 125,
239 /* rex2 extend interger registers */
240 130, 131, 132, 133, 134, 135, 136, 137,
241 138, 139, 140, 141, 142, 143, 144, 145
244 /* Define the register numbers to be used in Dwarf debugging information.
245 The SVR4 reference port C compiler uses the following register numbers
246 in its Dwarf output code:
247 0 for %eax (gcc regno = 0)
248 1 for %ecx (gcc regno = 2)
249 2 for %edx (gcc regno = 1)
250 3 for %ebx (gcc regno = 3)
251 4 for %esp (gcc regno = 7)
252 5 for %ebp (gcc regno = 6)
253 6 for %esi (gcc regno = 4)
254 7 for %edi (gcc regno = 5)
255 The following three DWARF register numbers are never generated by
256 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
257 believed these numbers have these meanings.
258 8 for %eip (no gcc equivalent)
259 9 for %eflags (gcc regno = 17)
260 10 for %trapno (no gcc equivalent)
261 It is not at all clear how we should number the FP stack registers
262 for the x86 architecture. If the version of SDB on x86/svr4 were
263 a bit less brain dead with respect to floating-point then we would
264 have a precedent to follow with respect to DWARF register numbers
265 for x86 FP registers, but the SDB on x86/svr4 was so completely
266 broken with respect to FP registers that it is hardly worth thinking
267 of it as something to strive for compatibility with.
268 The version of x86/svr4 SDB I had does (partially)
269 seem to believe that DWARF register number 11 is associated with
270 the x86 register %st(0), but that's about all. Higher DWARF
271 register numbers don't seem to be associated with anything in
272 particular, and even for DWARF regno 11, SDB only seemed to under-
273 stand that it should say that a variable lives in %st(0) (when
274 asked via an `=' command) if we said it was in DWARF regno 11,
275 but SDB still printed garbage when asked for the value of the
276 variable in question (via a `/' command).
277 (Also note that the labels SDB printed for various FP stack regs
278 when doing an `x' command were all wrong.)
279 Note that these problems generally don't affect the native SVR4
280 C compiler because it doesn't allow the use of -O with -g and
281 because when it is *not* optimizing, it allocates a memory
282 location for each floating-point variable, and the memory
283 location is what gets described in the DWARF AT_location
284 attribute for the variable in question.
285 Regardless of the severe mental illness of the x86/svr4 SDB, we
286 do something sensible here and we use the following DWARF
287 register numbers. Note that these are all stack-top-relative
288 numbers.
289 11 for %st(0) (gcc regno = 8)
290 12 for %st(1) (gcc regno = 9)
291 13 for %st(2) (gcc regno = 10)
292 14 for %st(3) (gcc regno = 11)
293 15 for %st(4) (gcc regno = 12)
294 16 for %st(5) (gcc regno = 13)
295 17 for %st(6) (gcc regno = 14)
296 18 for %st(7) (gcc regno = 15)
298 int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
300 /* general regs */
301 0, 2, 1, 3, 6, 7, 5, 4,
302 /* fp regs */
303 11, 12, 13, 14, 15, 16, 17, 18,
304 /* arg, flags, fpsr, frame */
305 IGNORED_DWARF_REGNUM, 9,
306 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
307 /* SSE registers */
308 21, 22, 23, 24, 25, 26, 27, 28,
309 /* MMX registers */
310 29, 30, 31, 32, 33, 34, 35, 36,
311 /* extended integer registers */
312 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
313 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
314 /* extended sse registers */
315 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
316 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
317 /* AVX-512 registers 16-23 */
318 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
319 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
320 /* AVX-512 registers 24-31 */
321 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
322 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
323 /* Mask registers */
324 93, 94, 95, 96, 97, 98, 99, 100
327 /* Define parameter passing and return registers. */
329 static int const x86_64_int_parameter_registers[6] =
331 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
334 static int const x86_64_ms_abi_int_parameter_registers[4] =
336 CX_REG, DX_REG, R8_REG, R9_REG
339 static int const x86_64_int_return_registers[4] =
341 AX_REG, DX_REG, DI_REG, SI_REG
344 /* Define the structure for the machine field in struct function. */
346 struct GTY(()) stack_local_entry {
347 unsigned short mode;
348 unsigned short n;
349 rtx rtl;
350 struct stack_local_entry *next;
353 /* Which cpu are we scheduling for. */
354 enum attr_cpu ix86_schedule;
356 /* Which cpu are we optimizing for. */
357 enum processor_type ix86_tune;
359 /* Which instruction set architecture to use. */
360 enum processor_type ix86_arch;
362 /* True if processor has SSE prefetch instruction. */
363 unsigned char ix86_prefetch_sse;
365 /* Preferred alignment for stack boundary in bits. */
366 unsigned int ix86_preferred_stack_boundary;
368 /* Alignment for incoming stack boundary in bits specified at
369 command line. */
370 unsigned int ix86_user_incoming_stack_boundary;
372 /* Default alignment for incoming stack boundary in bits. */
373 unsigned int ix86_default_incoming_stack_boundary;
375 /* Alignment for incoming stack boundary in bits. */
376 unsigned int ix86_incoming_stack_boundary;
378 /* True if there is no direct access to extern symbols. */
379 bool ix86_has_no_direct_extern_access;
381 /* Calling abi specific va_list type nodes. */
382 tree sysv_va_list_type_node;
383 tree ms_va_list_type_node;
385 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
386 char internal_label_prefix[16];
387 int internal_label_prefix_len;
389 /* Fence to use after loop using movnt. */
390 tree x86_mfence;
392 /* Register class used for passing given 64bit part of the argument.
393 These represent classes as documented by the PS ABI, with the exception
394 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
395 use SF or DFmode move instead of DImode to avoid reformatting penalties.
397 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
398 whenever possible (upper half does contain padding). */
399 enum x86_64_reg_class
401 X86_64_NO_CLASS,
402 X86_64_INTEGER_CLASS,
403 X86_64_INTEGERSI_CLASS,
404 X86_64_SSE_CLASS,
405 X86_64_SSEHF_CLASS,
406 X86_64_SSESF_CLASS,
407 X86_64_SSEDF_CLASS,
408 X86_64_SSEUP_CLASS,
409 X86_64_X87_CLASS,
410 X86_64_X87UP_CLASS,
411 X86_64_COMPLEX_X87_CLASS,
412 X86_64_MEMORY_CLASS
415 #define MAX_CLASSES 8
417 /* Table of constants used by fldpi, fldln2, etc.... */
418 static REAL_VALUE_TYPE ext_80387_constants_table [5];
419 static bool ext_80387_constants_init;
422 static rtx ix86_function_value (const_tree, const_tree, bool);
423 static bool ix86_function_value_regno_p (const unsigned int);
424 static unsigned int ix86_function_arg_boundary (machine_mode,
425 const_tree);
426 static rtx ix86_static_chain (const_tree, bool);
427 static int ix86_function_regparm (const_tree, const_tree);
428 static void ix86_compute_frame_layout (void);
429 static tree ix86_canonical_va_list_type (tree);
430 static unsigned int split_stack_prologue_scratch_regno (void);
431 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
433 static bool ix86_can_inline_p (tree, tree);
434 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
437 /* Whether -mtune= or -march= were specified */
438 int ix86_tune_defaulted;
439 int ix86_arch_specified;
441 /* Return true if a red-zone is in use. We can't use red-zone when
442 there are local indirect jumps, like "indirect_jump" or "tablejump",
443 which jumps to another place in the function, since "call" in the
444 indirect thunk pushes the return address onto stack, destroying
445 red-zone.
447 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
448 for CALL, in red-zone, we can allow local indirect jumps with
449 indirect thunk. */
451 bool
452 ix86_using_red_zone (void)
454 return (TARGET_RED_ZONE
455 && !TARGET_64BIT_MS_ABI
456 && (!cfun->machine->has_local_indirect_jump
457 || cfun->machine->indirect_branch_type == indirect_branch_keep));
460 /* Return true, if profiling code should be emitted before
461 prologue. Otherwise it returns false.
462 Note: For x86 with "hotfix" it is sorried. */
463 static bool
464 ix86_profile_before_prologue (void)
466 return flag_fentry != 0;
469 /* Update register usage after having seen the compiler flags. */
471 static void
472 ix86_conditional_register_usage (void)
474 int i, c_mask;
476 /* If there are no caller-saved registers, preserve all registers.
477 except fixed_regs and registers used for function return value
478 since aggregate_value_p checks call_used_regs[regno] on return
479 value. */
480 if (cfun
481 && (cfun->machine->call_saved_registers
482 == TYPE_NO_CALLER_SAVED_REGISTERS))
483 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
484 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
485 call_used_regs[i] = 0;
487 /* For 32-bit targets, disable the REX registers. */
488 if (! TARGET_64BIT)
490 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
491 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
492 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
493 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
494 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
495 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
498 /* See the definition of CALL_USED_REGISTERS in i386.h. */
499 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
501 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
503 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
505 /* Set/reset conditionally defined registers from
506 CALL_USED_REGISTERS initializer. */
507 if (call_used_regs[i] > 1)
508 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
510 /* Calculate registers of CLOBBERED_REGS register set
511 as call used registers from GENERAL_REGS register set. */
512 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
513 && call_used_regs[i])
514 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
517 /* If MMX is disabled, disable the registers. */
518 if (! TARGET_MMX)
519 accessible_reg_set &= ~reg_class_contents[MMX_REGS];
521 /* If SSE is disabled, disable the registers. */
522 if (! TARGET_SSE)
523 accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
525 /* If the FPU is disabled, disable the registers. */
526 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
527 accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
529 /* If AVX512F is disabled, disable the registers. */
530 if (! TARGET_AVX512F)
532 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
533 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
535 accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
538 /* If APX is disabled, disable the registers. */
539 if (! (TARGET_APX_EGPR && TARGET_64BIT))
541 for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
542 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
546 /* Canonicalize a comparison from one we don't have to one we do have. */
548 static void
549 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
550 bool op0_preserve_value)
552 /* The order of operands in x87 ficom compare is forced by combine in
553 simplify_comparison () function. Float operator is treated as RTX_OBJ
554 with a precedence over other operators and is always put in the first
555 place. Swap condition and operands to match ficom instruction. */
556 if (!op0_preserve_value
557 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
559 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
561 /* We are called only for compares that are split to SAHF instruction.
562 Ensure that we have setcc/jcc insn for the swapped condition. */
563 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
565 std::swap (*op0, *op1);
566 *code = (int) scode;
572 /* Hook to determine if one function can safely inline another. */
574 static bool
575 ix86_can_inline_p (tree caller, tree callee)
577 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
578 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
580 /* Changes of those flags can be tolerated for always inlines. Lets hope
581 user knows what he is doing. */
582 unsigned HOST_WIDE_INT always_inline_safe_mask
583 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
584 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
585 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
586 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
587 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
588 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
589 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
592 if (!callee_tree)
593 callee_tree = target_option_default_node;
594 if (!caller_tree)
595 caller_tree = target_option_default_node;
596 if (callee_tree == caller_tree)
597 return true;
599 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
600 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
601 bool ret = false;
602 bool always_inline
603 = (DECL_DISREGARD_INLINE_LIMITS (callee)
604 && lookup_attribute ("always_inline",
605 DECL_ATTRIBUTES (callee)));
607 /* If callee only uses GPRs, ignore MASK_80387. */
608 if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
609 always_inline_safe_mask |= MASK_80387;
611 cgraph_node *callee_node = cgraph_node::get (callee);
612 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
613 function can inline a SSE2 function but a SSE2 function can't inline
614 a SSE4 function. */
615 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
616 != callee_opts->x_ix86_isa_flags)
617 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
618 != callee_opts->x_ix86_isa_flags2))
619 ret = false;
621 /* See if we have the same non-isa options. */
622 else if ((!always_inline
623 && caller_opts->x_target_flags != callee_opts->x_target_flags)
624 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
625 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
626 ret = false;
628 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
629 /* If the calle doesn't use FP expressions differences in
630 ix86_fpmath can be ignored. We are called from FEs
631 for multi-versioning call optimization, so beware of
632 ipa_fn_summaries not available. */
633 && (! ipa_fn_summaries
634 || ipa_fn_summaries->get (callee_node) == NULL
635 || ipa_fn_summaries->get (callee_node)->fp_expressions))
636 ret = false;
638 /* At this point we cannot identify whether arch or tune setting
639 comes from target attribute or not. So the most conservative way
640 is to allow the callee that uses default arch and tune string to
641 be inlined. */
642 else if (!strcmp (callee_opts->x_ix86_arch_string, "x86-64")
643 && !strcmp (callee_opts->x_ix86_tune_string, "generic"))
644 ret = true;
646 /* See if arch, tune, etc. are the same. As previous ISA flags already
647 checks if callee's ISA is subset of caller's, do not block
648 always_inline attribute for callee even it has different arch. */
649 else if (!always_inline && caller_opts->arch != callee_opts->arch)
650 ret = false;
652 else if (!always_inline && caller_opts->tune != callee_opts->tune)
653 ret = false;
655 else if (!always_inline
656 && caller_opts->branch_cost != callee_opts->branch_cost)
657 ret = false;
659 else
660 ret = true;
662 return ret;
665 /* Return true if this goes in large data/bss. */
667 static bool
668 ix86_in_large_data_p (tree exp)
670 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC
671 && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC)
672 return false;
674 if (exp == NULL_TREE)
675 return false;
677 /* Functions are never large data. */
678 if (TREE_CODE (exp) == FUNCTION_DECL)
679 return false;
681 /* Automatic variables are never large data. */
682 if (VAR_P (exp) && !is_global_var (exp))
683 return false;
685 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
687 const char *section = DECL_SECTION_NAME (exp);
688 if (strcmp (section, ".ldata") == 0
689 || strcmp (section, ".lbss") == 0)
690 return true;
691 return false;
693 else
695 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
697 /* If this is an incomplete type with size 0, then we can't put it
698 in data because it might be too big when completed. Also,
699 int_size_in_bytes returns -1 if size can vary or is larger than
700 an integer in which case also it is safer to assume that it goes in
701 large data. */
702 if (size <= 0 || size > ix86_section_threshold)
703 return true;
706 return false;
709 /* i386-specific section flag to mark large sections. */
710 #define SECTION_LARGE SECTION_MACH_DEP
712 /* Switch to the appropriate section for output of DECL.
713 DECL is either a `VAR_DECL' node or a constant of some sort.
714 RELOC indicates whether forming the initial value of DECL requires
715 link-time relocations. */
717 ATTRIBUTE_UNUSED static section *
718 x86_64_elf_select_section (tree decl, int reloc,
719 unsigned HOST_WIDE_INT align)
721 if (ix86_in_large_data_p (decl))
723 const char *sname = NULL;
724 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
725 switch (categorize_decl_for_section (decl, reloc))
727 case SECCAT_DATA:
728 sname = ".ldata";
729 break;
730 case SECCAT_DATA_REL:
731 sname = ".ldata.rel";
732 break;
733 case SECCAT_DATA_REL_LOCAL:
734 sname = ".ldata.rel.local";
735 break;
736 case SECCAT_DATA_REL_RO:
737 sname = ".ldata.rel.ro";
738 break;
739 case SECCAT_DATA_REL_RO_LOCAL:
740 sname = ".ldata.rel.ro.local";
741 break;
742 case SECCAT_BSS:
743 sname = ".lbss";
744 flags |= SECTION_BSS;
745 break;
746 case SECCAT_RODATA:
747 case SECCAT_RODATA_MERGE_STR:
748 case SECCAT_RODATA_MERGE_STR_INIT:
749 case SECCAT_RODATA_MERGE_CONST:
750 sname = ".lrodata";
751 flags &= ~SECTION_WRITE;
752 break;
753 case SECCAT_SRODATA:
754 case SECCAT_SDATA:
755 case SECCAT_SBSS:
756 gcc_unreachable ();
757 case SECCAT_TEXT:
758 case SECCAT_TDATA:
759 case SECCAT_TBSS:
760 /* We don't split these for medium model. Place them into
761 default sections and hope for best. */
762 break;
764 if (sname)
766 /* We might get called with string constants, but get_named_section
767 doesn't like them as they are not DECLs. Also, we need to set
768 flags in that case. */
769 if (!DECL_P (decl))
770 return get_section (sname, flags, NULL);
771 return get_named_section (decl, sname, reloc);
774 return default_elf_select_section (decl, reloc, align);
777 /* Select a set of attributes for section NAME based on the properties
778 of DECL and whether or not RELOC indicates that DECL's initializer
779 might contain runtime relocations. */
781 static unsigned int ATTRIBUTE_UNUSED
782 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
784 unsigned int flags = default_section_type_flags (decl, name, reloc);
786 if (ix86_in_large_data_p (decl))
787 flags |= SECTION_LARGE;
789 if (decl == NULL_TREE
790 && (strcmp (name, ".ldata.rel.ro") == 0
791 || strcmp (name, ".ldata.rel.ro.local") == 0))
792 flags |= SECTION_RELRO;
794 if (strcmp (name, ".lbss") == 0
795 || startswith (name, ".lbss.")
796 || startswith (name, ".gnu.linkonce.lb."))
797 flags |= SECTION_BSS;
799 return flags;
802 /* Build up a unique section name, expressed as a
803 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
804 RELOC indicates whether the initial value of EXP requires
805 link-time relocations. */
807 static void ATTRIBUTE_UNUSED
808 x86_64_elf_unique_section (tree decl, int reloc)
810 if (ix86_in_large_data_p (decl))
812 const char *prefix = NULL;
813 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
814 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
816 switch (categorize_decl_for_section (decl, reloc))
818 case SECCAT_DATA:
819 case SECCAT_DATA_REL:
820 case SECCAT_DATA_REL_LOCAL:
821 case SECCAT_DATA_REL_RO:
822 case SECCAT_DATA_REL_RO_LOCAL:
823 prefix = one_only ? ".ld" : ".ldata";
824 break;
825 case SECCAT_BSS:
826 prefix = one_only ? ".lb" : ".lbss";
827 break;
828 case SECCAT_RODATA:
829 case SECCAT_RODATA_MERGE_STR:
830 case SECCAT_RODATA_MERGE_STR_INIT:
831 case SECCAT_RODATA_MERGE_CONST:
832 prefix = one_only ? ".lr" : ".lrodata";
833 break;
834 case SECCAT_SRODATA:
835 case SECCAT_SDATA:
836 case SECCAT_SBSS:
837 gcc_unreachable ();
838 case SECCAT_TEXT:
839 case SECCAT_TDATA:
840 case SECCAT_TBSS:
841 /* We don't split these for medium model. Place them into
842 default sections and hope for best. */
843 break;
845 if (prefix)
847 const char *name, *linkonce;
848 char *string;
850 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
851 name = targetm.strip_name_encoding (name);
853 /* If we're using one_only, then there needs to be a .gnu.linkonce
854 prefix to the section name. */
855 linkonce = one_only ? ".gnu.linkonce" : "";
857 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
859 set_decl_section_name (decl, string);
860 return;
863 default_unique_section (decl, reloc);
866 #ifdef COMMON_ASM_OP
868 #ifndef LARGECOMM_SECTION_ASM_OP
869 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
870 #endif
872 /* This says how to output assembler code to declare an
873 uninitialized external linkage data object.
875 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
876 large objects. */
877 void
878 x86_elf_aligned_decl_common (FILE *file, tree decl,
879 const char *name, unsigned HOST_WIDE_INT size,
880 unsigned align)
882 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
883 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
884 && size > (unsigned int)ix86_section_threshold)
886 switch_to_section (get_named_section (decl, ".lbss", 0));
887 fputs (LARGECOMM_SECTION_ASM_OP, file);
889 else
890 fputs (COMMON_ASM_OP, file);
891 assemble_name (file, name);
892 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
893 size, align / BITS_PER_UNIT);
895 #endif
897 /* Utility function for targets to use in implementing
898 ASM_OUTPUT_ALIGNED_BSS. */
900 void
901 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
902 unsigned HOST_WIDE_INT size, unsigned align)
904 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
905 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
906 && size > (unsigned int)ix86_section_threshold)
907 switch_to_section (get_named_section (decl, ".lbss", 0));
908 else
909 switch_to_section (bss_section);
910 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
911 #ifdef ASM_DECLARE_OBJECT_NAME
912 last_assemble_variable_decl = decl;
913 ASM_DECLARE_OBJECT_NAME (file, name, decl);
914 #else
915 /* Standard thing is just output label for the object. */
916 ASM_OUTPUT_LABEL (file, name);
917 #endif /* ASM_DECLARE_OBJECT_NAME */
918 ASM_OUTPUT_SKIP (file, size ? size : 1);
921 /* Decide whether we must probe the stack before any space allocation
922 on this target. It's essentially TARGET_STACK_PROBE except when
923 -fstack-check causes the stack to be already probed differently. */
925 bool
926 ix86_target_stack_probe (void)
928 /* Do not probe the stack twice if static stack checking is enabled. */
929 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
930 return false;
932 return TARGET_STACK_PROBE;
935 /* Decide whether we can make a sibling call to a function. DECL is the
936 declaration of the function being targeted by the call and EXP is the
937 CALL_EXPR representing the call. */
939 static bool
940 ix86_function_ok_for_sibcall (tree decl, tree exp)
942 tree type, decl_or_type;
943 rtx a, b;
944 bool bind_global = decl && !targetm.binds_local_p (decl);
946 if (ix86_function_naked (current_function_decl))
947 return false;
949 /* Sibling call isn't OK if there are no caller-saved registers
950 since all registers must be preserved before return. */
951 if (cfun->machine->call_saved_registers
952 == TYPE_NO_CALLER_SAVED_REGISTERS)
953 return false;
955 /* If we are generating position-independent code, we cannot sibcall
956 optimize direct calls to global functions, as the PLT requires
957 %ebx be live. (Darwin does not have a PLT.) */
958 if (!TARGET_MACHO
959 && !TARGET_64BIT
960 && flag_pic
961 && flag_plt
962 && bind_global)
963 return false;
965 /* If we need to align the outgoing stack, then sibcalling would
966 unalign the stack, which may break the called function. */
967 if (ix86_minimum_incoming_stack_boundary (true)
968 < PREFERRED_STACK_BOUNDARY)
969 return false;
971 if (decl)
973 decl_or_type = decl;
974 type = TREE_TYPE (decl);
976 else
978 /* We're looking at the CALL_EXPR, we need the type of the function. */
979 type = CALL_EXPR_FN (exp); /* pointer expression */
980 type = TREE_TYPE (type); /* pointer type */
981 type = TREE_TYPE (type); /* function type */
982 decl_or_type = type;
985 /* Sibling call isn't OK if callee has no callee-saved registers
986 and the calling function has callee-saved registers. */
987 if ((cfun->machine->call_saved_registers
988 != TYPE_NO_CALLEE_SAVED_REGISTERS)
989 && lookup_attribute ("no_callee_saved_registers",
990 TYPE_ATTRIBUTES (type)))
991 return false;
993 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
994 if ((OUTGOING_REG_PARM_STACK_SPACE (type)
995 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
996 || (REG_PARM_STACK_SPACE (decl_or_type)
997 != REG_PARM_STACK_SPACE (current_function_decl)))
999 maybe_complain_about_tail_call (exp,
1000 "inconsistent size of stack space"
1001 " allocated for arguments which are"
1002 " passed in registers");
1003 return false;
1006 /* Check that the return value locations are the same. Like
1007 if we are returning floats on the 80387 register stack, we cannot
1008 make a sibcall from a function that doesn't return a float to a
1009 function that does or, conversely, from a function that does return
1010 a float to a function that doesn't; the necessary stack adjustment
1011 would not be executed. This is also the place we notice
1012 differences in the return value ABI. Note that it is ok for one
1013 of the functions to have void return type as long as the return
1014 value of the other is passed in a register. */
1015 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
1016 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1017 cfun->decl, false);
1018 if (STACK_REG_P (a) || STACK_REG_P (b))
1020 if (!rtx_equal_p (a, b))
1021 return false;
1023 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1025 else if (!rtx_equal_p (a, b))
1026 return false;
1028 if (TARGET_64BIT)
1030 /* The SYSV ABI has more call-clobbered registers;
1031 disallow sibcalls from MS to SYSV. */
1032 if (cfun->machine->call_abi == MS_ABI
1033 && ix86_function_type_abi (type) == SYSV_ABI)
1034 return false;
1036 else
1038 /* If this call is indirect, we'll need to be able to use a
1039 call-clobbered register for the address of the target function.
1040 Make sure that all such registers are not used for passing
1041 parameters. Note that DLLIMPORT functions and call to global
1042 function via GOT slot are indirect. */
1043 if (!decl
1044 || (bind_global && flag_pic && !flag_plt)
1045 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
1046 || flag_force_indirect_call)
1048 /* Check if regparm >= 3 since arg_reg_available is set to
1049 false if regparm == 0. If regparm is 1 or 2, there is
1050 always a call-clobbered register available.
1052 ??? The symbol indirect call doesn't need a call-clobbered
1053 register. But we don't know if this is a symbol indirect
1054 call or not here. */
1055 if (ix86_function_regparm (type, decl) >= 3
1056 && !cfun->machine->arg_reg_available)
1057 return false;
1061 if (decl && ix86_use_pseudo_pic_reg ())
1063 /* When PIC register is used, it must be restored after ifunc
1064 function returns. */
1065 cgraph_node *node = cgraph_node::get (decl);
1066 if (node && node->ifunc_resolver)
1067 return false;
1070 /* Disable sibcall if callee has indirect_return attribute and
1071 caller doesn't since callee will return to the caller's caller
1072 via an indirect jump. */
1073 if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
1074 == (CF_RETURN | CF_BRANCH))
1075 && lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (type))
1076 && !lookup_attribute ("indirect_return",
1077 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
1078 return false;
1080 /* Otherwise okay. That also includes certain types of indirect calls. */
1081 return true;
1084 /* This function determines from TYPE the calling-convention. */
1086 unsigned int
1087 ix86_get_callcvt (const_tree type)
1089 unsigned int ret = 0;
1090 bool is_stdarg;
1091 tree attrs;
1093 if (TARGET_64BIT)
1094 return IX86_CALLCVT_CDECL;
1096 attrs = TYPE_ATTRIBUTES (type);
1097 if (attrs != NULL_TREE)
1099 if (lookup_attribute ("cdecl", attrs))
1100 ret |= IX86_CALLCVT_CDECL;
1101 else if (lookup_attribute ("stdcall", attrs))
1102 ret |= IX86_CALLCVT_STDCALL;
1103 else if (lookup_attribute ("fastcall", attrs))
1104 ret |= IX86_CALLCVT_FASTCALL;
1105 else if (lookup_attribute ("thiscall", attrs))
1106 ret |= IX86_CALLCVT_THISCALL;
1108 /* Regparam isn't allowed for thiscall and fastcall. */
1109 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1111 if (lookup_attribute ("regparm", attrs))
1112 ret |= IX86_CALLCVT_REGPARM;
1113 if (lookup_attribute ("sseregparm", attrs))
1114 ret |= IX86_CALLCVT_SSEREGPARM;
1117 if (IX86_BASE_CALLCVT(ret) != 0)
1118 return ret;
1121 is_stdarg = stdarg_p (type);
1122 if (TARGET_RTD && !is_stdarg)
1123 return IX86_CALLCVT_STDCALL | ret;
1125 if (ret != 0
1126 || is_stdarg
1127 || TREE_CODE (type) != METHOD_TYPE
1128 || ix86_function_type_abi (type) != MS_ABI)
1129 return IX86_CALLCVT_CDECL | ret;
1131 return IX86_CALLCVT_THISCALL;
1134 /* Return 0 if the attributes for two types are incompatible, 1 if they
1135 are compatible, and 2 if they are nearly compatible (which causes a
1136 warning to be generated). */
1138 static int
1139 ix86_comp_type_attributes (const_tree type1, const_tree type2)
1141 unsigned int ccvt1, ccvt2;
1143 if (TREE_CODE (type1) != FUNCTION_TYPE
1144 && TREE_CODE (type1) != METHOD_TYPE)
1145 return 1;
1147 ccvt1 = ix86_get_callcvt (type1);
1148 ccvt2 = ix86_get_callcvt (type2);
1149 if (ccvt1 != ccvt2)
1150 return 0;
1151 if (ix86_function_regparm (type1, NULL)
1152 != ix86_function_regparm (type2, NULL))
1153 return 0;
1155 if (lookup_attribute ("no_callee_saved_registers",
1156 TYPE_ATTRIBUTES (type1))
1157 != lookup_attribute ("no_callee_saved_registers",
1158 TYPE_ATTRIBUTES (type2)))
1159 return 0;
1161 return 1;
1164 /* Return the regparm value for a function with the indicated TYPE and DECL.
1165 DECL may be NULL when calling function indirectly
1166 or considering a libcall. */
1168 static int
1169 ix86_function_regparm (const_tree type, const_tree decl)
1171 tree attr;
1172 int regparm;
1173 unsigned int ccvt;
1175 if (TARGET_64BIT)
1176 return (ix86_function_type_abi (type) == SYSV_ABI
1177 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1178 ccvt = ix86_get_callcvt (type);
1179 regparm = ix86_regparm;
1181 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1183 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1184 if (attr)
1186 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1187 return regparm;
1190 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1191 return 2;
1192 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1193 return 1;
1195 /* Use register calling convention for local functions when possible. */
1196 if (decl
1197 && TREE_CODE (decl) == FUNCTION_DECL)
1199 cgraph_node *target = cgraph_node::get (decl);
1200 if (target)
1201 target = target->function_symbol ();
1203 /* Caller and callee must agree on the calling convention, so
1204 checking here just optimize means that with
1205 __attribute__((optimize (...))) caller could use regparm convention
1206 and callee not, or vice versa. Instead look at whether the callee
1207 is optimized or not. */
1208 if (target && opt_for_fn (target->decl, optimize)
1209 && !(profile_flag && !flag_fentry))
1211 if (target->local && target->can_change_signature)
1213 int local_regparm, globals = 0, regno;
1215 /* Make sure no regparm register is taken by a
1216 fixed register variable. */
1217 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1218 local_regparm++)
1219 if (fixed_regs[local_regparm])
1220 break;
1222 /* We don't want to use regparm(3) for nested functions as
1223 these use a static chain pointer in the third argument. */
1224 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1225 local_regparm = 2;
1227 /* Save a register for the split stack. */
1228 if (flag_split_stack)
1230 if (local_regparm == 3)
1231 local_regparm = 2;
1232 else if (local_regparm == 2
1233 && DECL_STATIC_CHAIN (target->decl))
1234 local_regparm = 1;
1237 /* Each fixed register usage increases register pressure,
1238 so less registers should be used for argument passing.
1239 This functionality can be overriden by an explicit
1240 regparm value. */
1241 for (regno = AX_REG; regno <= DI_REG; regno++)
1242 if (fixed_regs[regno])
1243 globals++;
1245 local_regparm
1246 = globals < local_regparm ? local_regparm - globals : 0;
1248 if (local_regparm > regparm)
1249 regparm = local_regparm;
1254 return regparm;
1257 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1258 DFmode (2) arguments in SSE registers for a function with the
1259 indicated TYPE and DECL. DECL may be NULL when calling function
1260 indirectly or considering a libcall. Return -1 if any FP parameter
1261 should be rejected by error. This is used in siutation we imply SSE
1262 calling convetion but the function is called from another function with
1263 SSE disabled. Otherwise return 0. */
1265 static int
1266 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1268 gcc_assert (!TARGET_64BIT);
1270 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1271 by the sseregparm attribute. */
1272 if (TARGET_SSEREGPARM
1273 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1275 if (!TARGET_SSE)
1277 if (warn)
1279 if (decl)
1280 error ("calling %qD with attribute sseregparm without "
1281 "SSE/SSE2 enabled", decl);
1282 else
1283 error ("calling %qT with attribute sseregparm without "
1284 "SSE/SSE2 enabled", type);
1286 return 0;
1289 return 2;
1292 if (!decl)
1293 return 0;
1295 cgraph_node *target = cgraph_node::get (decl);
1296 if (target)
1297 target = target->function_symbol ();
1299 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1300 (and DFmode for SSE2) arguments in SSE registers. */
1301 if (target
1302 /* TARGET_SSE_MATH */
1303 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1304 && opt_for_fn (target->decl, optimize)
1305 && !(profile_flag && !flag_fentry))
1307 if (target->local && target->can_change_signature)
1309 /* Refuse to produce wrong code when local function with SSE enabled
1310 is called from SSE disabled function.
1311 FIXME: We need a way to detect these cases cross-ltrans partition
1312 and avoid using SSE calling conventions on local functions called
1313 from function with SSE disabled. For now at least delay the
1314 warning until we know we are going to produce wrong code.
1315 See PR66047 */
1316 if (!TARGET_SSE && warn)
1317 return -1;
1318 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1319 ->x_ix86_isa_flags) ? 2 : 1;
1323 return 0;
1326 /* Return true if EAX is live at the start of the function. Used by
1327 ix86_expand_prologue to determine if we need special help before
1328 calling allocate_stack_worker. */
1330 static bool
1331 ix86_eax_live_at_start_p (void)
1333 /* Cheat. Don't bother working forward from ix86_function_regparm
1334 to the function type to whether an actual argument is located in
1335 eax. Instead just look at cfg info, which is still close enough
1336 to correct at this point. This gives false positives for broken
1337 functions that might use uninitialized data that happens to be
1338 allocated in eax, but who cares? */
1339 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1342 static bool
1343 ix86_keep_aggregate_return_pointer (tree fntype)
1345 tree attr;
1347 if (!TARGET_64BIT)
1349 attr = lookup_attribute ("callee_pop_aggregate_return",
1350 TYPE_ATTRIBUTES (fntype));
1351 if (attr)
1352 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1354 /* For 32-bit MS-ABI the default is to keep aggregate
1355 return pointer. */
1356 if (ix86_function_type_abi (fntype) == MS_ABI)
1357 return true;
1359 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1362 /* Value is the number of bytes of arguments automatically
1363 popped when returning from a subroutine call.
1364 FUNDECL is the declaration node of the function (as a tree),
1365 FUNTYPE is the data type of the function (as a tree),
1366 or for a library call it is an identifier node for the subroutine name.
1367 SIZE is the number of bytes of arguments passed on the stack.
1369 On the 80386, the RTD insn may be used to pop them if the number
1370 of args is fixed, but if the number is variable then the caller
1371 must pop them all. RTD can't be used for library calls now
1372 because the library is compiled with the Unix compiler.
1373 Use of RTD is a selectable option, since it is incompatible with
1374 standard Unix calling sequences. If the option is not selected,
1375 the caller must always pop the args.
1377 The attribute stdcall is equivalent to RTD on a per module basis. */
1379 static poly_int64
1380 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1382 unsigned int ccvt;
1384 /* None of the 64-bit ABIs pop arguments. */
1385 if (TARGET_64BIT)
1386 return 0;
1388 ccvt = ix86_get_callcvt (funtype);
1390 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1391 | IX86_CALLCVT_THISCALL)) != 0
1392 && ! stdarg_p (funtype))
1393 return size;
1395 /* Lose any fake structure return argument if it is passed on the stack. */
1396 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1397 && !ix86_keep_aggregate_return_pointer (funtype))
1399 int nregs = ix86_function_regparm (funtype, fundecl);
1400 if (nregs == 0)
1401 return GET_MODE_SIZE (Pmode);
1404 return 0;
1407 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1409 static bool
1410 ix86_legitimate_combined_insn (rtx_insn *insn)
1412 int i;
1414 /* Check operand constraints in case hard registers were propagated
1415 into insn pattern. This check prevents combine pass from
1416 generating insn patterns with invalid hard register operands.
1417 These invalid insns can eventually confuse reload to error out
1418 with a spill failure. See also PRs 46829 and 46843. */
1420 gcc_assert (INSN_CODE (insn) >= 0);
1422 extract_insn (insn);
1423 preprocess_constraints (insn);
1425 int n_operands = recog_data.n_operands;
1426 int n_alternatives = recog_data.n_alternatives;
1427 for (i = 0; i < n_operands; i++)
1429 rtx op = recog_data.operand[i];
1430 machine_mode mode = GET_MODE (op);
1431 const operand_alternative *op_alt;
1432 int offset = 0;
1433 bool win;
1434 int j;
1436 /* A unary operator may be accepted by the predicate, but it
1437 is irrelevant for matching constraints. */
1438 if (UNARY_P (op))
1439 op = XEXP (op, 0);
1441 if (SUBREG_P (op))
1443 if (REG_P (SUBREG_REG (op))
1444 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1445 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1446 GET_MODE (SUBREG_REG (op)),
1447 SUBREG_BYTE (op),
1448 GET_MODE (op));
1449 op = SUBREG_REG (op);
1452 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1453 continue;
1455 op_alt = recog_op_alt;
1457 /* Operand has no constraints, anything is OK. */
1458 win = !n_alternatives;
1460 alternative_mask preferred = get_preferred_alternatives (insn);
1461 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1463 if (!TEST_BIT (preferred, j))
1464 continue;
1465 if (op_alt[i].anything_ok
1466 || (op_alt[i].matches != -1
1467 && operands_match_p
1468 (recog_data.operand[i],
1469 recog_data.operand[op_alt[i].matches]))
1470 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1472 win = true;
1473 break;
1477 if (!win)
1478 return false;
1481 return true;
1484 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1486 static unsigned HOST_WIDE_INT
1487 ix86_asan_shadow_offset (void)
1489 return SUBTARGET_SHADOW_OFFSET;
1492 /* Argument support functions. */
1494 /* Return true when register may be used to pass function parameters. */
1495 bool
1496 ix86_function_arg_regno_p (int regno)
1498 int i;
1499 enum calling_abi call_abi;
1500 const int *parm_regs;
1502 if (TARGET_SSE && SSE_REGNO_P (regno)
1503 && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
1504 return true;
1506 if (!TARGET_64BIT)
1507 return (regno < REGPARM_MAX
1508 || (TARGET_MMX && MMX_REGNO_P (regno)
1509 && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
1511 /* TODO: The function should depend on current function ABI but
1512 builtins.cc would need updating then. Therefore we use the
1513 default ABI. */
1514 call_abi = ix86_cfun_abi ();
1516 /* RAX is used as hidden argument to va_arg functions. */
1517 if (call_abi == SYSV_ABI && regno == AX_REG)
1518 return true;
1520 if (call_abi == MS_ABI)
1521 parm_regs = x86_64_ms_abi_int_parameter_registers;
1522 else
1523 parm_regs = x86_64_int_parameter_registers;
1525 for (i = 0; i < (call_abi == MS_ABI
1526 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1527 if (regno == parm_regs[i])
1528 return true;
1529 return false;
1532 /* Return if we do not know how to pass ARG solely in registers. */
1534 static bool
1535 ix86_must_pass_in_stack (const function_arg_info &arg)
1537 if (must_pass_in_stack_var_size_or_pad (arg))
1538 return true;
1540 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1541 The layout_type routine is crafty and tries to trick us into passing
1542 currently unsupported vector types on the stack by using TImode. */
1543 return (!TARGET_64BIT && arg.mode == TImode
1544 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1547 /* It returns the size, in bytes, of the area reserved for arguments passed
1548 in registers for the function represented by fndecl dependent to the used
1549 abi format. */
1551 ix86_reg_parm_stack_space (const_tree fndecl)
1553 enum calling_abi call_abi = SYSV_ABI;
1554 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1555 call_abi = ix86_function_abi (fndecl);
1556 else
1557 call_abi = ix86_function_type_abi (fndecl);
1558 if (TARGET_64BIT && call_abi == MS_ABI)
1559 return 32;
1560 return 0;
1563 /* We add this as a workaround in order to use libc_has_function
1564 hook in i386.md. */
1565 bool
1566 ix86_libc_has_function (enum function_class fn_class)
1568 return targetm.libc_has_function (fn_class, NULL_TREE);
1571 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1572 specifying the call abi used. */
1573 enum calling_abi
1574 ix86_function_type_abi (const_tree fntype)
1576 enum calling_abi abi = ix86_abi;
1578 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1579 return abi;
1581 if (abi == SYSV_ABI
1582 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1584 static int warned;
1585 if (TARGET_X32 && !warned)
1587 error ("X32 does not support %<ms_abi%> attribute");
1588 warned = 1;
1591 abi = MS_ABI;
1593 else if (abi == MS_ABI
1594 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1595 abi = SYSV_ABI;
1597 return abi;
1600 enum calling_abi
1601 ix86_function_abi (const_tree fndecl)
1603 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1606 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1607 specifying the call abi used. */
1608 enum calling_abi
1609 ix86_cfun_abi (void)
1611 return cfun ? cfun->machine->call_abi : ix86_abi;
1614 bool
1615 ix86_function_ms_hook_prologue (const_tree fn)
1617 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1619 if (decl_function_context (fn) != NULL_TREE)
1620 error_at (DECL_SOURCE_LOCATION (fn),
1621 "%<ms_hook_prologue%> attribute is not compatible "
1622 "with nested function");
1623 else
1624 return true;
1626 return false;
1629 bool
1630 ix86_function_naked (const_tree fn)
1632 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1633 return true;
1635 return false;
1638 /* Write the extra assembler code needed to declare a function properly. */
1640 void
1641 ix86_asm_output_function_label (FILE *out_file, const char *fname,
1642 tree decl)
1644 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1646 if (cfun)
1647 cfun->machine->function_label_emitted = true;
1649 if (is_ms_hook)
1651 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1652 unsigned int filler_cc = 0xcccccccc;
1654 for (i = 0; i < filler_count; i += 4)
1655 fprintf (out_file, ASM_LONG " %#x\n", filler_cc);
1658 #ifdef SUBTARGET_ASM_UNWIND_INIT
1659 SUBTARGET_ASM_UNWIND_INIT (out_file);
1660 #endif
1662 assemble_function_label_raw (out_file, fname);
1664 /* Output magic byte marker, if hot-patch attribute is set. */
1665 if (is_ms_hook)
1667 if (TARGET_64BIT)
1669 /* leaq [%rsp + 0], %rsp */
1670 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1671 out_file);
1673 else
1675 /* movl.s %edi, %edi
1676 push %ebp
1677 movl.s %esp, %ebp */
1678 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file);
1683 /* Implementation of call abi switching target hook. Specific to FNDECL
1684 the specific call register sets are set. See also
1685 ix86_conditional_register_usage for more details. */
1686 void
1687 ix86_call_abi_override (const_tree fndecl)
1689 cfun->machine->call_abi = ix86_function_abi (fndecl);
1692 /* Return 1 if pseudo register should be created and used to hold
1693 GOT address for PIC code. */
1694 bool
1695 ix86_use_pseudo_pic_reg (void)
1697 if ((TARGET_64BIT
1698 && (ix86_cmodel == CM_SMALL_PIC
1699 || TARGET_PECOFF))
1700 || !flag_pic)
1701 return false;
1702 return true;
1705 /* Initialize large model PIC register. */
1707 static void
1708 ix86_init_large_pic_reg (unsigned int tmp_regno)
1710 rtx_code_label *label;
1711 rtx tmp_reg;
1713 gcc_assert (Pmode == DImode);
1714 label = gen_label_rtx ();
1715 emit_label (label);
1716 LABEL_PRESERVE_P (label) = 1;
1717 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1718 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1719 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1720 label));
1721 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1722 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1723 const char *name = LABEL_NAME (label);
1724 PUT_CODE (label, NOTE);
1725 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1726 NOTE_DELETED_LABEL_NAME (label) = name;
1729 /* Create and initialize PIC register if required. */
1730 static void
1731 ix86_init_pic_reg (void)
1733 edge entry_edge;
1734 rtx_insn *seq;
1736 if (!ix86_use_pseudo_pic_reg ())
1737 return;
1739 start_sequence ();
1741 if (TARGET_64BIT)
1743 if (ix86_cmodel == CM_LARGE_PIC)
1744 ix86_init_large_pic_reg (R11_REG);
1745 else
1746 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1748 else
1750 /* If there is future mcount call in the function it is more profitable
1751 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1752 rtx reg = crtl->profile
1753 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1754 : pic_offset_table_rtx;
1755 rtx_insn *insn = emit_insn (gen_set_got (reg));
1756 RTX_FRAME_RELATED_P (insn) = 1;
1757 if (crtl->profile)
1758 emit_move_insn (pic_offset_table_rtx, reg);
1759 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1762 seq = get_insns ();
1763 end_sequence ();
1765 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1766 insert_insn_on_edge (seq, entry_edge);
1767 commit_one_edge_insertion (entry_edge);
1770 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1771 for a call to a function whose data type is FNTYPE.
1772 For a library call, FNTYPE is 0. */
1774 void
1775 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1776 tree fntype, /* tree ptr for function decl */
1777 rtx libname, /* SYMBOL_REF of library name or 0 */
1778 tree fndecl,
1779 int caller)
1781 struct cgraph_node *local_info_node = NULL;
1782 struct cgraph_node *target = NULL;
1784 /* Set silent_p to false to raise an error for invalid calls when
1785 expanding function body. */
1786 cfun->machine->silent_p = false;
1788 memset (cum, 0, sizeof (*cum));
1790 if (fndecl)
1792 target = cgraph_node::get (fndecl);
1793 if (target)
1795 target = target->function_symbol ();
1796 local_info_node = cgraph_node::local_info_node (target->decl);
1797 cum->call_abi = ix86_function_abi (target->decl);
1799 else
1800 cum->call_abi = ix86_function_abi (fndecl);
1802 else
1803 cum->call_abi = ix86_function_type_abi (fntype);
1805 cum->caller = caller;
1807 /* Set up the number of registers to use for passing arguments. */
1808 cum->nregs = ix86_regparm;
1809 if (TARGET_64BIT)
1811 cum->nregs = (cum->call_abi == SYSV_ABI
1812 ? X86_64_REGPARM_MAX
1813 : X86_64_MS_REGPARM_MAX);
1815 if (TARGET_SSE)
1817 cum->sse_nregs = SSE_REGPARM_MAX;
1818 if (TARGET_64BIT)
1820 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1821 ? X86_64_SSE_REGPARM_MAX
1822 : X86_64_MS_SSE_REGPARM_MAX);
1825 if (TARGET_MMX)
1826 cum->mmx_nregs = MMX_REGPARM_MAX;
1827 cum->warn_avx512f = true;
1828 cum->warn_avx = true;
1829 cum->warn_sse = true;
1830 cum->warn_mmx = true;
1832 /* Because type might mismatch in between caller and callee, we need to
1833 use actual type of function for local calls.
1834 FIXME: cgraph_analyze can be told to actually record if function uses
1835 va_start so for local functions maybe_vaarg can be made aggressive
1836 helping K&R code.
1837 FIXME: once typesytem is fixed, we won't need this code anymore. */
1838 if (local_info_node && local_info_node->local
1839 && local_info_node->can_change_signature)
1840 fntype = TREE_TYPE (target->decl);
1841 cum->stdarg = stdarg_p (fntype);
1842 cum->maybe_vaarg = (fntype
1843 ? (!prototype_p (fntype) || stdarg_p (fntype))
1844 : !libname);
1846 cum->decl = fndecl;
1848 cum->warn_empty = !warn_abi || cum->stdarg;
1849 if (!cum->warn_empty && fntype)
1851 function_args_iterator iter;
1852 tree argtype;
1853 bool seen_empty_type = false;
1854 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1856 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1857 break;
1858 if (TYPE_EMPTY_P (argtype))
1859 seen_empty_type = true;
1860 else if (seen_empty_type)
1862 cum->warn_empty = true;
1863 break;
1868 if (!TARGET_64BIT)
1870 /* If there are variable arguments, then we won't pass anything
1871 in registers in 32-bit mode. */
1872 if (stdarg_p (fntype))
1874 cum->nregs = 0;
1875 /* Since in 32-bit, variable arguments are always passed on
1876 stack, there is scratch register available for indirect
1877 sibcall. */
1878 cfun->machine->arg_reg_available = true;
1879 cum->sse_nregs = 0;
1880 cum->mmx_nregs = 0;
1881 cum->warn_avx512f = false;
1882 cum->warn_avx = false;
1883 cum->warn_sse = false;
1884 cum->warn_mmx = false;
1885 return;
1888 /* Use ecx and edx registers if function has fastcall attribute,
1889 else look for regparm information. */
1890 if (fntype)
1892 unsigned int ccvt = ix86_get_callcvt (fntype);
1893 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1895 cum->nregs = 1;
1896 cum->fastcall = 1; /* Same first register as in fastcall. */
1898 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1900 cum->nregs = 2;
1901 cum->fastcall = 1;
1903 else
1904 cum->nregs = ix86_function_regparm (fntype, fndecl);
1907 /* Set up the number of SSE registers used for passing SFmode
1908 and DFmode arguments. Warn for mismatching ABI. */
1909 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
1912 cfun->machine->arg_reg_available = (cum->nregs > 0);
1915 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1916 But in the case of vector types, it is some vector mode.
1918 When we have only some of our vector isa extensions enabled, then there
1919 are some modes for which vector_mode_supported_p is false. For these
1920 modes, the generic vector support in gcc will choose some non-vector mode
1921 in order to implement the type. By computing the natural mode, we'll
1922 select the proper ABI location for the operand and not depend on whatever
1923 the middle-end decides to do with these vector types.
1925 The midde-end can't deal with the vector types > 16 bytes. In this
1926 case, we return the original mode and warn ABI change if CUM isn't
1927 NULL.
1929 If INT_RETURN is true, warn ABI change if the vector mode isn't
1930 available for function return value. */
1932 static machine_mode
1933 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1934 bool in_return)
1936 machine_mode mode = TYPE_MODE (type);
1938 if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
1940 HOST_WIDE_INT size = int_size_in_bytes (type);
1941 if ((size == 8 || size == 16 || size == 32 || size == 64)
1942 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1943 && TYPE_VECTOR_SUBPARTS (type) > 1)
1945 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1947 /* There are no XFmode vector modes ... */
1948 if (innermode == XFmode)
1949 return mode;
1951 /* ... and no decimal float vector modes. */
1952 if (DECIMAL_FLOAT_MODE_P (innermode))
1953 return mode;
1955 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
1956 mode = MIN_MODE_VECTOR_FLOAT;
1957 else
1958 mode = MIN_MODE_VECTOR_INT;
1960 /* Get the mode which has this inner mode and number of units. */
1961 FOR_EACH_MODE_FROM (mode, mode)
1962 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
1963 && GET_MODE_INNER (mode) == innermode)
1965 if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512)
1966 && !TARGET_IAMCU)
1968 static bool warnedavx512f;
1969 static bool warnedavx512f_ret;
1971 if (cum && cum->warn_avx512f && !warnedavx512f)
1973 if (warning (OPT_Wpsabi, "AVX512F vector argument "
1974 "without AVX512F enabled changes the ABI"))
1975 warnedavx512f = true;
1977 else if (in_return && !warnedavx512f_ret)
1979 if (warning (OPT_Wpsabi, "AVX512F vector return "
1980 "without AVX512F enabled changes the ABI"))
1981 warnedavx512f_ret = true;
1984 return TYPE_MODE (type);
1986 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
1988 static bool warnedavx;
1989 static bool warnedavx_ret;
1991 if (cum && cum->warn_avx && !warnedavx)
1993 if (warning (OPT_Wpsabi, "AVX vector argument "
1994 "without AVX enabled changes the ABI"))
1995 warnedavx = true;
1997 else if (in_return && !warnedavx_ret)
1999 if (warning (OPT_Wpsabi, "AVX vector return "
2000 "without AVX enabled changes the ABI"))
2001 warnedavx_ret = true;
2004 return TYPE_MODE (type);
2006 else if (((size == 8 && TARGET_64BIT) || size == 16)
2007 && !TARGET_SSE
2008 && !TARGET_IAMCU)
2010 static bool warnedsse;
2011 static bool warnedsse_ret;
2013 if (cum && cum->warn_sse && !warnedsse)
2015 if (warning (OPT_Wpsabi, "SSE vector argument "
2016 "without SSE enabled changes the ABI"))
2017 warnedsse = true;
2019 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
2021 if (warning (OPT_Wpsabi, "SSE vector return "
2022 "without SSE enabled changes the ABI"))
2023 warnedsse_ret = true;
2026 else if ((size == 8 && !TARGET_64BIT)
2027 && (!cfun
2028 || cfun->machine->func_type == TYPE_NORMAL)
2029 && !TARGET_MMX
2030 && !TARGET_IAMCU)
2032 static bool warnedmmx;
2033 static bool warnedmmx_ret;
2035 if (cum && cum->warn_mmx && !warnedmmx)
2037 if (warning (OPT_Wpsabi, "MMX vector argument "
2038 "without MMX enabled changes the ABI"))
2039 warnedmmx = true;
2041 else if (in_return && !warnedmmx_ret)
2043 if (warning (OPT_Wpsabi, "MMX vector return "
2044 "without MMX enabled changes the ABI"))
2045 warnedmmx_ret = true;
2048 return mode;
2051 gcc_unreachable ();
2055 return mode;
2058 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2059 this may not agree with the mode that the type system has chosen for the
2060 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2061 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2063 static rtx
2064 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
2065 unsigned int regno)
2067 rtx tmp;
2069 if (orig_mode != BLKmode)
2070 tmp = gen_rtx_REG (orig_mode, regno);
2071 else
2073 tmp = gen_rtx_REG (mode, regno);
2074 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2075 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2078 return tmp;
2081 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2082 of this code is to classify each 8bytes of incoming argument by the register
2083 class and assign registers accordingly. */
2085 /* Return the union class of CLASS1 and CLASS2.
2086 See the x86-64 PS ABI for details. */
2088 static enum x86_64_reg_class
2089 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2091 /* Rule #1: If both classes are equal, this is the resulting class. */
2092 if (class1 == class2)
2093 return class1;
2095 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2096 the other class. */
2097 if (class1 == X86_64_NO_CLASS)
2098 return class2;
2099 if (class2 == X86_64_NO_CLASS)
2100 return class1;
2102 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2103 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2104 return X86_64_MEMORY_CLASS;
2106 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2107 if ((class1 == X86_64_INTEGERSI_CLASS
2108 && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
2109 || (class2 == X86_64_INTEGERSI_CLASS
2110 && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
2111 return X86_64_INTEGERSI_CLASS;
2112 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2113 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2114 return X86_64_INTEGER_CLASS;
2116 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2117 MEMORY is used. */
2118 if (class1 == X86_64_X87_CLASS
2119 || class1 == X86_64_X87UP_CLASS
2120 || class1 == X86_64_COMPLEX_X87_CLASS
2121 || class2 == X86_64_X87_CLASS
2122 || class2 == X86_64_X87UP_CLASS
2123 || class2 == X86_64_COMPLEX_X87_CLASS)
2124 return X86_64_MEMORY_CLASS;
2126 /* Rule #6: Otherwise class SSE is used. */
2127 return X86_64_SSE_CLASS;
2130 /* Classify the argument of type TYPE and mode MODE.
2131 CLASSES will be filled by the register class used to pass each word
2132 of the operand. The number of words is returned. In case the parameter
2133 should be passed in memory, 0 is returned. As a special case for zero
2134 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2136 BIT_OFFSET is used internally for handling records and specifies offset
2137 of the offset in bits modulo 512 to avoid overflow cases.
2139 See the x86-64 PS ABI for details.
2142 static int
2143 classify_argument (machine_mode mode, const_tree type,
2144 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
2145 int &zero_width_bitfields)
2147 HOST_WIDE_INT bytes
2148 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2149 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2151 /* Variable sized entities are always passed/returned in memory. */
2152 if (bytes < 0)
2153 return 0;
2155 if (mode != VOIDmode)
2157 /* The value of "named" doesn't matter. */
2158 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2159 if (targetm.calls.must_pass_in_stack (arg))
2160 return 0;
2163 if (type && (AGGREGATE_TYPE_P (type)
2164 || (TREE_CODE (type) == BITINT_TYPE && words > 1)))
2166 int i;
2167 tree field;
2168 enum x86_64_reg_class subclasses[MAX_CLASSES];
2170 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2171 if (bytes > 64)
2172 return 0;
2174 for (i = 0; i < words; i++)
2175 classes[i] = X86_64_NO_CLASS;
2177 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2178 signalize memory class, so handle it as special case. */
2179 if (!words)
2181 classes[0] = X86_64_NO_CLASS;
2182 return 1;
2185 /* Classify each field of record and merge classes. */
2186 switch (TREE_CODE (type))
2188 case RECORD_TYPE:
2189 /* And now merge the fields of structure. */
2190 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2192 if (TREE_CODE (field) == FIELD_DECL)
2194 int num;
2196 if (TREE_TYPE (field) == error_mark_node)
2197 continue;
2199 /* Bitfields are always classified as integer. Handle them
2200 early, since later code would consider them to be
2201 misaligned integers. */
2202 if (DECL_BIT_FIELD (field))
2204 if (integer_zerop (DECL_SIZE (field)))
2206 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
2207 continue;
2208 if (zero_width_bitfields != 2)
2210 zero_width_bitfields = 1;
2211 continue;
2214 for (i = (int_bit_position (field)
2215 + (bit_offset % 64)) / 8 / 8;
2216 i < ((int_bit_position (field) + (bit_offset % 64))
2217 + tree_to_shwi (DECL_SIZE (field))
2218 + 63) / 8 / 8; i++)
2219 classes[i]
2220 = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2222 else
2224 int pos;
2226 type = TREE_TYPE (field);
2228 /* Flexible array member is ignored. */
2229 if (TYPE_MODE (type) == BLKmode
2230 && TREE_CODE (type) == ARRAY_TYPE
2231 && TYPE_SIZE (type) == NULL_TREE
2232 && TYPE_DOMAIN (type) != NULL_TREE
2233 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2234 == NULL_TREE))
2236 static bool warned;
2238 if (!warned && warn_psabi)
2240 warned = true;
2241 inform (input_location,
2242 "the ABI of passing struct with"
2243 " a flexible array member has"
2244 " changed in GCC 4.4");
2246 continue;
2248 num = classify_argument (TYPE_MODE (type), type,
2249 subclasses,
2250 (int_bit_position (field)
2251 + bit_offset) % 512,
2252 zero_width_bitfields);
2253 if (!num)
2254 return 0;
2255 pos = (int_bit_position (field)
2256 + (bit_offset % 64)) / 8 / 8;
2257 for (i = 0; i < num && (i + pos) < words; i++)
2258 classes[i + pos]
2259 = merge_classes (subclasses[i], classes[i + pos]);
2263 break;
2265 case ARRAY_TYPE:
2266 /* Arrays are handled as small records. */
2268 int num;
2269 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2270 TREE_TYPE (type), subclasses, bit_offset,
2271 zero_width_bitfields);
2272 if (!num)
2273 return 0;
2275 /* The partial classes are now full classes. */
2276 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2277 subclasses[0] = X86_64_SSE_CLASS;
2278 if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
2279 subclasses[0] = X86_64_SSE_CLASS;
2280 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2281 && !((bit_offset % 64) == 0 && bytes == 4))
2282 subclasses[0] = X86_64_INTEGER_CLASS;
2284 for (i = 0; i < words; i++)
2285 classes[i] = subclasses[i % num];
2287 break;
2289 case UNION_TYPE:
2290 case QUAL_UNION_TYPE:
2291 /* Unions are similar to RECORD_TYPE but offset is always 0.
2293 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2295 if (TREE_CODE (field) == FIELD_DECL)
2297 int num;
2299 if (TREE_TYPE (field) == error_mark_node)
2300 continue;
2302 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2303 TREE_TYPE (field), subclasses,
2304 bit_offset, zero_width_bitfields);
2305 if (!num)
2306 return 0;
2307 for (i = 0; i < num && i < words; i++)
2308 classes[i] = merge_classes (subclasses[i], classes[i]);
2311 break;
2313 case BITINT_TYPE:
2314 /* _BitInt(N) for N > 64 is passed as structure containing
2315 (N + 63) / 64 64-bit elements. */
2316 if (words > 2)
2317 return 0;
2318 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2319 return 2;
2321 default:
2322 gcc_unreachable ();
2325 if (words > 2)
2327 /* When size > 16 bytes, if the first one isn't
2328 X86_64_SSE_CLASS or any other ones aren't
2329 X86_64_SSEUP_CLASS, everything should be passed in
2330 memory. */
2331 if (classes[0] != X86_64_SSE_CLASS)
2332 return 0;
2334 for (i = 1; i < words; i++)
2335 if (classes[i] != X86_64_SSEUP_CLASS)
2336 return 0;
2339 /* Final merger cleanup. */
2340 for (i = 0; i < words; i++)
2342 /* If one class is MEMORY, everything should be passed in
2343 memory. */
2344 if (classes[i] == X86_64_MEMORY_CLASS)
2345 return 0;
2347 /* The X86_64_SSEUP_CLASS should be always preceded by
2348 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2349 if (classes[i] == X86_64_SSEUP_CLASS
2350 && classes[i - 1] != X86_64_SSE_CLASS
2351 && classes[i - 1] != X86_64_SSEUP_CLASS)
2353 /* The first one should never be X86_64_SSEUP_CLASS. */
2354 gcc_assert (i != 0);
2355 classes[i] = X86_64_SSE_CLASS;
2358 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2359 everything should be passed in memory. */
2360 if (classes[i] == X86_64_X87UP_CLASS
2361 && (classes[i - 1] != X86_64_X87_CLASS))
2363 static bool warned;
2365 /* The first one should never be X86_64_X87UP_CLASS. */
2366 gcc_assert (i != 0);
2367 if (!warned && warn_psabi)
2369 warned = true;
2370 inform (input_location,
2371 "the ABI of passing union with %<long double%>"
2372 " has changed in GCC 4.4");
2374 return 0;
2377 return words;
2380 /* Compute alignment needed. We align all types to natural boundaries with
2381 exception of XFmode that is aligned to 64bits. */
2382 if (mode != VOIDmode && mode != BLKmode)
2384 int mode_alignment = GET_MODE_BITSIZE (mode);
2386 if (mode == XFmode)
2387 mode_alignment = 128;
2388 else if (mode == XCmode)
2389 mode_alignment = 256;
2390 if (COMPLEX_MODE_P (mode))
2391 mode_alignment /= 2;
2392 /* Misaligned fields are always returned in memory. */
2393 if (bit_offset % mode_alignment)
2394 return 0;
2397 /* for V1xx modes, just use the base mode */
2398 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2399 && GET_MODE_UNIT_SIZE (mode) == bytes)
2400 mode = GET_MODE_INNER (mode);
2402 /* Classification of atomic types. */
2403 switch (mode)
2405 case E_SDmode:
2406 case E_DDmode:
2407 classes[0] = X86_64_SSE_CLASS;
2408 return 1;
2409 case E_TDmode:
2410 classes[0] = X86_64_SSE_CLASS;
2411 classes[1] = X86_64_SSEUP_CLASS;
2412 return 2;
2413 case E_DImode:
2414 case E_SImode:
2415 case E_HImode:
2416 case E_QImode:
2417 case E_CSImode:
2418 case E_CHImode:
2419 case E_CQImode:
2421 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2423 /* Analyze last 128 bits only. */
2424 size = (size - 1) & 0x7f;
2426 if (size < 32)
2428 classes[0] = X86_64_INTEGERSI_CLASS;
2429 return 1;
2431 else if (size < 64)
2433 classes[0] = X86_64_INTEGER_CLASS;
2434 return 1;
2436 else if (size < 64+32)
2438 classes[0] = X86_64_INTEGER_CLASS;
2439 classes[1] = X86_64_INTEGERSI_CLASS;
2440 return 2;
2442 else if (size < 64+64)
2444 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2445 return 2;
2447 else
2448 gcc_unreachable ();
2450 case E_CDImode:
2451 case E_TImode:
2452 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2453 return 2;
2454 case E_COImode:
2455 case E_OImode:
2456 /* OImode shouldn't be used directly. */
2457 gcc_unreachable ();
2458 case E_CTImode:
2459 return 0;
2460 case E_HFmode:
2461 case E_BFmode:
2462 if (!(bit_offset % 64))
2463 classes[0] = X86_64_SSEHF_CLASS;
2464 else
2465 classes[0] = X86_64_SSE_CLASS;
2466 return 1;
2467 case E_SFmode:
2468 if (!(bit_offset % 64))
2469 classes[0] = X86_64_SSESF_CLASS;
2470 else
2471 classes[0] = X86_64_SSE_CLASS;
2472 return 1;
2473 case E_DFmode:
2474 classes[0] = X86_64_SSEDF_CLASS;
2475 return 1;
2476 case E_XFmode:
2477 classes[0] = X86_64_X87_CLASS;
2478 classes[1] = X86_64_X87UP_CLASS;
2479 return 2;
2480 case E_TFmode:
2481 classes[0] = X86_64_SSE_CLASS;
2482 classes[1] = X86_64_SSEUP_CLASS;
2483 return 2;
2484 case E_HCmode:
2485 case E_BCmode:
2486 classes[0] = X86_64_SSE_CLASS;
2487 if (!(bit_offset % 64))
2488 return 1;
2489 else
2491 classes[1] = X86_64_SSEHF_CLASS;
2492 return 2;
2494 case E_SCmode:
2495 classes[0] = X86_64_SSE_CLASS;
2496 if (!(bit_offset % 64))
2497 return 1;
2498 else
2500 static bool warned;
2502 if (!warned && warn_psabi)
2504 warned = true;
2505 inform (input_location,
2506 "the ABI of passing structure with %<complex float%>"
2507 " member has changed in GCC 4.4");
2509 classes[1] = X86_64_SSESF_CLASS;
2510 return 2;
2512 case E_DCmode:
2513 classes[0] = X86_64_SSEDF_CLASS;
2514 classes[1] = X86_64_SSEDF_CLASS;
2515 return 2;
2516 case E_XCmode:
2517 classes[0] = X86_64_COMPLEX_X87_CLASS;
2518 return 1;
2519 case E_TCmode:
2520 /* This modes is larger than 16 bytes. */
2521 return 0;
2522 case E_V8SFmode:
2523 case E_V8SImode:
2524 case E_V32QImode:
2525 case E_V16HFmode:
2526 case E_V16BFmode:
2527 case E_V16HImode:
2528 case E_V4DFmode:
2529 case E_V4DImode:
2530 classes[0] = X86_64_SSE_CLASS;
2531 classes[1] = X86_64_SSEUP_CLASS;
2532 classes[2] = X86_64_SSEUP_CLASS;
2533 classes[3] = X86_64_SSEUP_CLASS;
2534 return 4;
2535 case E_V8DFmode:
2536 case E_V16SFmode:
2537 case E_V32HFmode:
2538 case E_V32BFmode:
2539 case E_V8DImode:
2540 case E_V16SImode:
2541 case E_V32HImode:
2542 case E_V64QImode:
2543 classes[0] = X86_64_SSE_CLASS;
2544 classes[1] = X86_64_SSEUP_CLASS;
2545 classes[2] = X86_64_SSEUP_CLASS;
2546 classes[3] = X86_64_SSEUP_CLASS;
2547 classes[4] = X86_64_SSEUP_CLASS;
2548 classes[5] = X86_64_SSEUP_CLASS;
2549 classes[6] = X86_64_SSEUP_CLASS;
2550 classes[7] = X86_64_SSEUP_CLASS;
2551 return 8;
2552 case E_V4SFmode:
2553 case E_V4SImode:
2554 case E_V16QImode:
2555 case E_V8HImode:
2556 case E_V8HFmode:
2557 case E_V8BFmode:
2558 case E_V2DFmode:
2559 case E_V2DImode:
2560 classes[0] = X86_64_SSE_CLASS;
2561 classes[1] = X86_64_SSEUP_CLASS;
2562 return 2;
2563 case E_V1TImode:
2564 case E_V1DImode:
2565 case E_V2SFmode:
2566 case E_V2SImode:
2567 case E_V4HImode:
2568 case E_V4HFmode:
2569 case E_V4BFmode:
2570 case E_V2HFmode:
2571 case E_V2BFmode:
2572 case E_V8QImode:
2573 classes[0] = X86_64_SSE_CLASS;
2574 return 1;
2575 case E_BLKmode:
2576 case E_VOIDmode:
2577 return 0;
2578 default:
2579 gcc_assert (VECTOR_MODE_P (mode));
2581 if (bytes > 16)
2582 return 0;
2584 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2586 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2587 classes[0] = X86_64_INTEGERSI_CLASS;
2588 else
2589 classes[0] = X86_64_INTEGER_CLASS;
2590 classes[1] = X86_64_INTEGER_CLASS;
2591 return 1 + (bytes > 8);
2595 /* Wrapper around classify_argument with the extra zero_width_bitfields
2596 argument, to diagnose GCC 12.1 ABI differences for C. */
2598 static int
2599 classify_argument (machine_mode mode, const_tree type,
2600 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2602 int zero_width_bitfields = 0;
2603 static bool warned = false;
2604 int n = classify_argument (mode, type, classes, bit_offset,
2605 zero_width_bitfields);
2606 if (!zero_width_bitfields || warned || !warn_psabi)
2607 return n;
2608 enum x86_64_reg_class alt_classes[MAX_CLASSES];
2609 zero_width_bitfields = 2;
2610 if (classify_argument (mode, type, alt_classes, bit_offset,
2611 zero_width_bitfields) != n)
2612 zero_width_bitfields = 3;
2613 else
2614 for (int i = 0; i < n; i++)
2615 if (classes[i] != alt_classes[i])
2617 zero_width_bitfields = 3;
2618 break;
2620 if (zero_width_bitfields == 3)
2622 warned = true;
2623 const char *url
2624 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
2626 inform (input_location,
2627 "the ABI of passing C structures with zero-width bit-fields"
2628 " has changed in GCC %{12.1%}", url);
2630 return n;
2633 /* Examine the argument and return set number of register required in each
2634 class. Return true iff parameter should be passed in memory. */
2636 static bool
2637 examine_argument (machine_mode mode, const_tree type, int in_return,
2638 int *int_nregs, int *sse_nregs)
2640 enum x86_64_reg_class regclass[MAX_CLASSES];
2641 int n = classify_argument (mode, type, regclass, 0);
2643 *int_nregs = 0;
2644 *sse_nregs = 0;
2646 if (!n)
2647 return true;
2648 for (n--; n >= 0; n--)
2649 switch (regclass[n])
2651 case X86_64_INTEGER_CLASS:
2652 case X86_64_INTEGERSI_CLASS:
2653 (*int_nregs)++;
2654 break;
2655 case X86_64_SSE_CLASS:
2656 case X86_64_SSEHF_CLASS:
2657 case X86_64_SSESF_CLASS:
2658 case X86_64_SSEDF_CLASS:
2659 (*sse_nregs)++;
2660 break;
2661 case X86_64_NO_CLASS:
2662 case X86_64_SSEUP_CLASS:
2663 break;
2664 case X86_64_X87_CLASS:
2665 case X86_64_X87UP_CLASS:
2666 case X86_64_COMPLEX_X87_CLASS:
2667 if (!in_return)
2668 return true;
2669 break;
2670 case X86_64_MEMORY_CLASS:
2671 gcc_unreachable ();
2674 return false;
2677 /* Construct container for the argument used by GCC interface. See
2678 FUNCTION_ARG for the detailed description. */
2680 static rtx
2681 construct_container (machine_mode mode, machine_mode orig_mode,
2682 const_tree type, int in_return, int nintregs, int nsseregs,
2683 const int *intreg, int sse_regno)
2685 /* The following variables hold the static issued_error state. */
2686 static bool issued_sse_arg_error;
2687 static bool issued_sse_ret_error;
2688 static bool issued_x87_ret_error;
2690 machine_mode tmpmode;
2691 int bytes
2692 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2693 enum x86_64_reg_class regclass[MAX_CLASSES];
2694 int n;
2695 int i;
2696 int nexps = 0;
2697 int needed_sseregs, needed_intregs;
2698 rtx exp[MAX_CLASSES];
2699 rtx ret;
2701 n = classify_argument (mode, type, regclass, 0);
2702 if (!n)
2703 return NULL;
2704 if (examine_argument (mode, type, in_return, &needed_intregs,
2705 &needed_sseregs))
2706 return NULL;
2707 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2708 return NULL;
2710 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2711 some less clueful developer tries to use floating-point anyway. */
2712 if (needed_sseregs
2713 && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
2715 /* Return early if we shouldn't raise an error for invalid
2716 calls. */
2717 if (cfun != NULL && cfun->machine->silent_p)
2718 return NULL;
2719 if (in_return)
2721 if (!issued_sse_ret_error)
2723 if (VALID_SSE2_TYPE_MODE (mode))
2724 error ("SSE register return with SSE2 disabled");
2725 else
2726 error ("SSE register return with SSE disabled");
2727 issued_sse_ret_error = true;
2730 else if (!issued_sse_arg_error)
2732 if (VALID_SSE2_TYPE_MODE (mode))
2733 error ("SSE register argument with SSE2 disabled");
2734 else
2735 error ("SSE register argument with SSE disabled");
2736 issued_sse_arg_error = true;
2738 return NULL;
2741 /* Likewise, error if the ABI requires us to return values in the
2742 x87 registers and the user specified -mno-80387. */
2743 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2744 for (i = 0; i < n; i++)
2745 if (regclass[i] == X86_64_X87_CLASS
2746 || regclass[i] == X86_64_X87UP_CLASS
2747 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2749 /* Return early if we shouldn't raise an error for invalid
2750 calls. */
2751 if (cfun != NULL && cfun->machine->silent_p)
2752 return NULL;
2753 if (!issued_x87_ret_error)
2755 error ("x87 register return with x87 disabled");
2756 issued_x87_ret_error = true;
2758 return NULL;
2761 /* First construct simple cases. Avoid SCmode, since we want to use
2762 single register to pass this type. */
2763 if (n == 1 && mode != SCmode && mode != HCmode)
2764 switch (regclass[0])
2766 case X86_64_INTEGER_CLASS:
2767 case X86_64_INTEGERSI_CLASS:
2768 return gen_rtx_REG (mode, intreg[0]);
2769 case X86_64_SSE_CLASS:
2770 case X86_64_SSEHF_CLASS:
2771 case X86_64_SSESF_CLASS:
2772 case X86_64_SSEDF_CLASS:
2773 if (mode != BLKmode)
2774 return gen_reg_or_parallel (mode, orig_mode,
2775 GET_SSE_REGNO (sse_regno));
2776 break;
2777 case X86_64_X87_CLASS:
2778 case X86_64_COMPLEX_X87_CLASS:
2779 return gen_rtx_REG (mode, FIRST_STACK_REG);
2780 case X86_64_NO_CLASS:
2781 /* Zero sized array, struct or class. */
2782 return NULL;
2783 default:
2784 gcc_unreachable ();
2786 if (n == 2
2787 && regclass[0] == X86_64_SSE_CLASS
2788 && regclass[1] == X86_64_SSEUP_CLASS
2789 && mode != BLKmode)
2790 return gen_reg_or_parallel (mode, orig_mode,
2791 GET_SSE_REGNO (sse_regno));
2792 if (n == 4
2793 && regclass[0] == X86_64_SSE_CLASS
2794 && regclass[1] == X86_64_SSEUP_CLASS
2795 && regclass[2] == X86_64_SSEUP_CLASS
2796 && regclass[3] == X86_64_SSEUP_CLASS
2797 && mode != BLKmode)
2798 return gen_reg_or_parallel (mode, orig_mode,
2799 GET_SSE_REGNO (sse_regno));
2800 if (n == 8
2801 && regclass[0] == X86_64_SSE_CLASS
2802 && regclass[1] == X86_64_SSEUP_CLASS
2803 && regclass[2] == X86_64_SSEUP_CLASS
2804 && regclass[3] == X86_64_SSEUP_CLASS
2805 && regclass[4] == X86_64_SSEUP_CLASS
2806 && regclass[5] == X86_64_SSEUP_CLASS
2807 && regclass[6] == X86_64_SSEUP_CLASS
2808 && regclass[7] == X86_64_SSEUP_CLASS
2809 && mode != BLKmode)
2810 return gen_reg_or_parallel (mode, orig_mode,
2811 GET_SSE_REGNO (sse_regno));
2812 if (n == 2
2813 && regclass[0] == X86_64_X87_CLASS
2814 && regclass[1] == X86_64_X87UP_CLASS)
2815 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2817 if (n == 2
2818 && regclass[0] == X86_64_INTEGER_CLASS
2819 && regclass[1] == X86_64_INTEGER_CLASS
2820 && (mode == CDImode || mode == TImode || mode == BLKmode)
2821 && intreg[0] + 1 == intreg[1])
2823 if (mode == BLKmode)
2825 /* Use TImode for BLKmode values in 2 integer registers. */
2826 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2827 gen_rtx_REG (TImode, intreg[0]),
2828 GEN_INT (0));
2829 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2830 XVECEXP (ret, 0, 0) = exp[0];
2831 return ret;
2833 else
2834 return gen_rtx_REG (mode, intreg[0]);
2837 /* Otherwise figure out the entries of the PARALLEL. */
2838 for (i = 0; i < n; i++)
2840 int pos;
2842 switch (regclass[i])
2844 case X86_64_NO_CLASS:
2845 break;
2846 case X86_64_INTEGER_CLASS:
2847 case X86_64_INTEGERSI_CLASS:
2848 /* Merge TImodes on aligned occasions here too. */
2849 if (i * 8 + 8 > bytes)
2851 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2852 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2853 /* We've requested 24 bytes we
2854 don't have mode for. Use DImode. */
2855 tmpmode = DImode;
2857 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2858 tmpmode = SImode;
2859 else
2860 tmpmode = DImode;
2861 exp [nexps++]
2862 = gen_rtx_EXPR_LIST (VOIDmode,
2863 gen_rtx_REG (tmpmode, *intreg),
2864 GEN_INT (i*8));
2865 intreg++;
2866 break;
2867 case X86_64_SSEHF_CLASS:
2868 tmpmode = (mode == BFmode ? BFmode : HFmode);
2869 exp [nexps++]
2870 = gen_rtx_EXPR_LIST (VOIDmode,
2871 gen_rtx_REG (tmpmode,
2872 GET_SSE_REGNO (sse_regno)),
2873 GEN_INT (i*8));
2874 sse_regno++;
2875 break;
2876 case X86_64_SSESF_CLASS:
2877 exp [nexps++]
2878 = gen_rtx_EXPR_LIST (VOIDmode,
2879 gen_rtx_REG (SFmode,
2880 GET_SSE_REGNO (sse_regno)),
2881 GEN_INT (i*8));
2882 sse_regno++;
2883 break;
2884 case X86_64_SSEDF_CLASS:
2885 exp [nexps++]
2886 = gen_rtx_EXPR_LIST (VOIDmode,
2887 gen_rtx_REG (DFmode,
2888 GET_SSE_REGNO (sse_regno)),
2889 GEN_INT (i*8));
2890 sse_regno++;
2891 break;
2892 case X86_64_SSE_CLASS:
2893 pos = i;
2894 switch (n)
2896 case 1:
2897 tmpmode = DImode;
2898 break;
2899 case 2:
2900 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2902 tmpmode = TImode;
2903 i++;
2905 else
2906 tmpmode = DImode;
2907 break;
2908 case 4:
2909 gcc_assert (i == 0
2910 && regclass[1] == X86_64_SSEUP_CLASS
2911 && regclass[2] == X86_64_SSEUP_CLASS
2912 && regclass[3] == X86_64_SSEUP_CLASS);
2913 tmpmode = OImode;
2914 i += 3;
2915 break;
2916 case 8:
2917 gcc_assert (i == 0
2918 && regclass[1] == X86_64_SSEUP_CLASS
2919 && regclass[2] == X86_64_SSEUP_CLASS
2920 && regclass[3] == X86_64_SSEUP_CLASS
2921 && regclass[4] == X86_64_SSEUP_CLASS
2922 && regclass[5] == X86_64_SSEUP_CLASS
2923 && regclass[6] == X86_64_SSEUP_CLASS
2924 && regclass[7] == X86_64_SSEUP_CLASS);
2925 tmpmode = XImode;
2926 i += 7;
2927 break;
2928 default:
2929 gcc_unreachable ();
2931 exp [nexps++]
2932 = gen_rtx_EXPR_LIST (VOIDmode,
2933 gen_rtx_REG (tmpmode,
2934 GET_SSE_REGNO (sse_regno)),
2935 GEN_INT (pos*8));
2936 sse_regno++;
2937 break;
2938 default:
2939 gcc_unreachable ();
2943 /* Empty aligned struct, union or class. */
2944 if (nexps == 0)
2945 return NULL;
2947 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2948 for (i = 0; i < nexps; i++)
2949 XVECEXP (ret, 0, i) = exp [i];
2950 return ret;
2953 /* Update the data in CUM to advance over an argument of mode MODE
2954 and data type TYPE. (TYPE is null for libcalls where that information
2955 may not be available.)
2957 Return a number of integer regsiters advanced over. */
2959 static int
2960 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2961 const_tree type, HOST_WIDE_INT bytes,
2962 HOST_WIDE_INT words)
2964 int res = 0;
2965 bool error_p = false;
2967 if (TARGET_IAMCU)
2969 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2970 bytes in registers. */
2971 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2972 goto pass_in_reg;
2973 return res;
2976 switch (mode)
2978 default:
2979 break;
2981 case E_BLKmode:
2982 if (bytes < 0)
2983 break;
2984 /* FALLTHRU */
2986 case E_DImode:
2987 case E_SImode:
2988 case E_HImode:
2989 case E_QImode:
2990 pass_in_reg:
2991 cum->words += words;
2992 cum->nregs -= words;
2993 cum->regno += words;
2994 if (cum->nregs >= 0)
2995 res = words;
2996 if (cum->nregs <= 0)
2998 cum->nregs = 0;
2999 cfun->machine->arg_reg_available = false;
3000 cum->regno = 0;
3002 break;
3004 case E_OImode:
3005 /* OImode shouldn't be used directly. */
3006 gcc_unreachable ();
3008 case E_DFmode:
3009 if (cum->float_in_sse == -1)
3010 error_p = true;
3011 if (cum->float_in_sse < 2)
3012 break;
3013 /* FALLTHRU */
3014 case E_SFmode:
3015 if (cum->float_in_sse == -1)
3016 error_p = true;
3017 if (cum->float_in_sse < 1)
3018 break;
3019 /* FALLTHRU */
3021 case E_V16HFmode:
3022 case E_V16BFmode:
3023 case E_V8SFmode:
3024 case E_V8SImode:
3025 case E_V64QImode:
3026 case E_V32HImode:
3027 case E_V16SImode:
3028 case E_V8DImode:
3029 case E_V32HFmode:
3030 case E_V32BFmode:
3031 case E_V16SFmode:
3032 case E_V8DFmode:
3033 case E_V32QImode:
3034 case E_V16HImode:
3035 case E_V4DFmode:
3036 case E_V4DImode:
3037 case E_TImode:
3038 case E_V16QImode:
3039 case E_V8HImode:
3040 case E_V4SImode:
3041 case E_V2DImode:
3042 case E_V8HFmode:
3043 case E_V8BFmode:
3044 case E_V4SFmode:
3045 case E_V2DFmode:
3046 if (!type || !AGGREGATE_TYPE_P (type))
3048 cum->sse_words += words;
3049 cum->sse_nregs -= 1;
3050 cum->sse_regno += 1;
3051 if (cum->sse_nregs <= 0)
3053 cum->sse_nregs = 0;
3054 cum->sse_regno = 0;
3057 break;
3059 case E_V8QImode:
3060 case E_V4HImode:
3061 case E_V4HFmode:
3062 case E_V4BFmode:
3063 case E_V2SImode:
3064 case E_V2SFmode:
3065 case E_V1TImode:
3066 case E_V1DImode:
3067 if (!type || !AGGREGATE_TYPE_P (type))
3069 cum->mmx_words += words;
3070 cum->mmx_nregs -= 1;
3071 cum->mmx_regno += 1;
3072 if (cum->mmx_nregs <= 0)
3074 cum->mmx_nregs = 0;
3075 cum->mmx_regno = 0;
3078 break;
3080 if (error_p)
3082 cum->float_in_sse = 0;
3083 error ("calling %qD with SSE calling convention without "
3084 "SSE/SSE2 enabled", cum->decl);
3085 sorry ("this is a GCC bug that can be worked around by adding "
3086 "attribute used to function called");
3089 return res;
3092 static int
3093 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
3094 const_tree type, HOST_WIDE_INT words, bool named)
3096 int int_nregs, sse_nregs;
3098 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3099 if (!named && (VALID_AVX512F_REG_MODE (mode)
3100 || VALID_AVX256_REG_MODE (mode)))
3101 return 0;
3103 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
3104 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3106 cum->nregs -= int_nregs;
3107 cum->sse_nregs -= sse_nregs;
3108 cum->regno += int_nregs;
3109 cum->sse_regno += sse_nregs;
3110 return int_nregs;
3112 else
3114 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
3115 cum->words = ROUND_UP (cum->words, align);
3116 cum->words += words;
3117 return 0;
3121 static int
3122 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3123 HOST_WIDE_INT words)
3125 /* Otherwise, this should be passed indirect. */
3126 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3128 cum->words += words;
3129 if (cum->nregs > 0)
3131 cum->nregs -= 1;
3132 cum->regno += 1;
3133 return 1;
3135 return 0;
3138 /* Update the data in CUM to advance over argument ARG. */
3140 static void
3141 ix86_function_arg_advance (cumulative_args_t cum_v,
3142 const function_arg_info &arg)
3144 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3145 machine_mode mode = arg.mode;
3146 HOST_WIDE_INT bytes, words;
3147 int nregs;
3149 /* The argument of interrupt handler is a special case and is
3150 handled in ix86_function_arg. */
3151 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3152 return;
3154 bytes = arg.promoted_size_in_bytes ();
3155 words = CEIL (bytes, UNITS_PER_WORD);
3157 if (arg.type)
3158 mode = type_natural_mode (arg.type, NULL, false);
3160 if (TARGET_64BIT)
3162 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3164 if (call_abi == MS_ABI)
3165 nregs = function_arg_advance_ms_64 (cum, bytes, words);
3166 else
3167 nregs = function_arg_advance_64 (cum, mode, arg.type, words,
3168 arg.named);
3170 else
3171 nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
3173 if (!nregs)
3175 /* Track if there are outgoing arguments on stack. */
3176 if (cum->caller)
3177 cfun->machine->outgoing_args_on_stack = true;
3181 /* Define where to put the arguments to a function.
3182 Value is zero to push the argument on the stack,
3183 or a hard register in which to store the argument.
3185 MODE is the argument's machine mode.
3186 TYPE is the data type of the argument (as a tree).
3187 This is null for libcalls where that information may
3188 not be available.
3189 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3190 the preceding args and about the function being called.
3191 NAMED is nonzero if this argument is a named parameter
3192 (otherwise it is an extra parameter matching an ellipsis). */
3194 static rtx
3195 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3196 machine_mode orig_mode, const_tree type,
3197 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3199 bool error_p = false;
3201 /* Avoid the AL settings for the Unix64 ABI. */
3202 if (mode == VOIDmode)
3203 return constm1_rtx;
3205 if (TARGET_IAMCU)
3207 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3208 bytes in registers. */
3209 if (!VECTOR_MODE_P (mode) && bytes <= 8)
3210 goto pass_in_reg;
3211 return NULL_RTX;
3214 switch (mode)
3216 default:
3217 break;
3219 case E_BLKmode:
3220 if (bytes < 0)
3221 break;
3222 /* FALLTHRU */
3223 case E_DImode:
3224 case E_SImode:
3225 case E_HImode:
3226 case E_QImode:
3227 pass_in_reg:
3228 if (words <= cum->nregs)
3230 int regno = cum->regno;
3232 /* Fastcall allocates the first two DWORD (SImode) or
3233 smaller arguments to ECX and EDX if it isn't an
3234 aggregate type . */
3235 if (cum->fastcall)
3237 if (mode == BLKmode
3238 || mode == DImode
3239 || (type && AGGREGATE_TYPE_P (type)))
3240 break;
3242 /* ECX not EAX is the first allocated register. */
3243 if (regno == AX_REG)
3244 regno = CX_REG;
3246 return gen_rtx_REG (mode, regno);
3248 break;
3250 case E_DFmode:
3251 if (cum->float_in_sse == -1)
3252 error_p = true;
3253 if (cum->float_in_sse < 2)
3254 break;
3255 /* FALLTHRU */
3256 case E_SFmode:
3257 if (cum->float_in_sse == -1)
3258 error_p = true;
3259 if (cum->float_in_sse < 1)
3260 break;
3261 /* FALLTHRU */
3262 case E_TImode:
3263 /* In 32bit, we pass TImode in xmm registers. */
3264 case E_V16QImode:
3265 case E_V8HImode:
3266 case E_V4SImode:
3267 case E_V2DImode:
3268 case E_V8HFmode:
3269 case E_V8BFmode:
3270 case E_V4SFmode:
3271 case E_V2DFmode:
3272 if (!type || !AGGREGATE_TYPE_P (type))
3274 if (cum->sse_nregs)
3275 return gen_reg_or_parallel (mode, orig_mode,
3276 cum->sse_regno + FIRST_SSE_REG);
3278 break;
3280 case E_OImode:
3281 case E_XImode:
3282 /* OImode and XImode shouldn't be used directly. */
3283 gcc_unreachable ();
3285 case E_V64QImode:
3286 case E_V32HImode:
3287 case E_V16SImode:
3288 case E_V8DImode:
3289 case E_V32HFmode:
3290 case E_V32BFmode:
3291 case E_V16SFmode:
3292 case E_V8DFmode:
3293 case E_V16HFmode:
3294 case E_V16BFmode:
3295 case E_V8SFmode:
3296 case E_V8SImode:
3297 case E_V32QImode:
3298 case E_V16HImode:
3299 case E_V4DFmode:
3300 case E_V4DImode:
3301 if (!type || !AGGREGATE_TYPE_P (type))
3303 if (cum->sse_nregs)
3304 return gen_reg_or_parallel (mode, orig_mode,
3305 cum->sse_regno + FIRST_SSE_REG);
3307 break;
3309 case E_V8QImode:
3310 case E_V4HImode:
3311 case E_V4HFmode:
3312 case E_V4BFmode:
3313 case E_V2SImode:
3314 case E_V2SFmode:
3315 case E_V1TImode:
3316 case E_V1DImode:
3317 if (!type || !AGGREGATE_TYPE_P (type))
3319 if (cum->mmx_nregs)
3320 return gen_reg_or_parallel (mode, orig_mode,
3321 cum->mmx_regno + FIRST_MMX_REG);
3323 break;
3325 if (error_p)
3327 cum->float_in_sse = 0;
3328 error ("calling %qD with SSE calling convention without "
3329 "SSE/SSE2 enabled", cum->decl);
3330 sorry ("this is a GCC bug that can be worked around by adding "
3331 "attribute used to function called");
3334 return NULL_RTX;
3337 static rtx
3338 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3339 machine_mode orig_mode, const_tree type, bool named)
3341 /* Handle a hidden AL argument containing number of registers
3342 for varargs x86-64 functions. */
3343 if (mode == VOIDmode)
3344 return GEN_INT (cum->maybe_vaarg
3345 ? (cum->sse_nregs < 0
3346 ? X86_64_SSE_REGPARM_MAX
3347 : cum->sse_regno)
3348 : -1);
3350 switch (mode)
3352 default:
3353 break;
3355 case E_V16HFmode:
3356 case E_V16BFmode:
3357 case E_V8SFmode:
3358 case E_V8SImode:
3359 case E_V32QImode:
3360 case E_V16HImode:
3361 case E_V4DFmode:
3362 case E_V4DImode:
3363 case E_V32HFmode:
3364 case E_V32BFmode:
3365 case E_V16SFmode:
3366 case E_V16SImode:
3367 case E_V64QImode:
3368 case E_V32HImode:
3369 case E_V8DFmode:
3370 case E_V8DImode:
3371 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3372 if (!named)
3373 return NULL;
3374 break;
3377 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3378 cum->sse_nregs,
3379 &x86_64_int_parameter_registers [cum->regno],
3380 cum->sse_regno);
3383 static rtx
3384 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3385 machine_mode orig_mode, bool named, const_tree type,
3386 HOST_WIDE_INT bytes)
3388 unsigned int regno;
3390 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3391 We use value of -2 to specify that current function call is MSABI. */
3392 if (mode == VOIDmode)
3393 return GEN_INT (-2);
3395 /* If we've run out of registers, it goes on the stack. */
3396 if (cum->nregs == 0)
3397 return NULL_RTX;
3399 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3401 /* Only floating point modes are passed in anything but integer regs. */
3402 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3404 if (named)
3406 if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3407 regno = cum->regno + FIRST_SSE_REG;
3409 else
3411 rtx t1, t2;
3413 /* Unnamed floating parameters are passed in both the
3414 SSE and integer registers. */
3415 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3416 t2 = gen_rtx_REG (mode, regno);
3417 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3418 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3419 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3422 /* Handle aggregated types passed in register. */
3423 if (orig_mode == BLKmode)
3425 if (bytes > 0 && bytes <= 8)
3426 mode = (bytes > 4 ? DImode : SImode);
3427 if (mode == BLKmode)
3428 mode = DImode;
3431 return gen_reg_or_parallel (mode, orig_mode, regno);
3434 /* Return where to put the arguments to a function.
3435 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3437 ARG describes the argument while CUM gives information about the
3438 preceding args and about the function being called. */
3440 static rtx
3441 ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3443 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3444 machine_mode mode = arg.mode;
3445 HOST_WIDE_INT bytes, words;
3446 rtx reg;
3448 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3450 gcc_assert (arg.type != NULL_TREE);
3451 if (POINTER_TYPE_P (arg.type))
3453 /* This is the pointer argument. */
3454 gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3455 /* It is at -WORD(AP) in the current frame in interrupt and
3456 exception handlers. */
3457 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3459 else
3461 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3462 && TREE_CODE (arg.type) == INTEGER_TYPE
3463 && TYPE_MODE (arg.type) == word_mode);
3464 /* The error code is the word-mode integer argument at
3465 -2 * WORD(AP) in the current frame of the exception
3466 handler. */
3467 reg = gen_rtx_MEM (word_mode,
3468 plus_constant (Pmode,
3469 arg_pointer_rtx,
3470 -2 * UNITS_PER_WORD));
3472 return reg;
3475 bytes = arg.promoted_size_in_bytes ();
3476 words = CEIL (bytes, UNITS_PER_WORD);
3478 /* To simplify the code below, represent vector types with a vector mode
3479 even if MMX/SSE are not active. */
3480 if (arg.type && VECTOR_TYPE_P (arg.type))
3481 mode = type_natural_mode (arg.type, cum, false);
3483 if (TARGET_64BIT)
3485 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3487 if (call_abi == MS_ABI)
3488 reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3489 arg.type, bytes);
3490 else
3491 reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3493 else
3494 reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3496 /* Track if there are outgoing arguments on stack. */
3497 if (reg == NULL_RTX && cum->caller)
3498 cfun->machine->outgoing_args_on_stack = true;
3500 return reg;
3503 /* A C expression that indicates when an argument must be passed by
3504 reference. If nonzero for an argument, a copy of that argument is
3505 made in memory and a pointer to the argument is passed instead of
3506 the argument itself. The pointer is passed in whatever way is
3507 appropriate for passing a pointer to that type. */
3509 static bool
3510 ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3512 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3514 if (TARGET_64BIT)
3516 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3518 /* See Windows x64 Software Convention. */
3519 if (call_abi == MS_ABI)
3521 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3523 if (tree type = arg.type)
3525 /* Arrays are passed by reference. */
3526 if (TREE_CODE (type) == ARRAY_TYPE)
3527 return true;
3529 if (RECORD_OR_UNION_TYPE_P (type))
3531 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3532 are passed by reference. */
3533 msize = int_size_in_bytes (type);
3537 /* __m128 is passed by reference. */
3538 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3540 else if (arg.type && int_size_in_bytes (arg.type) == -1)
3541 return true;
3544 return false;
3547 /* Return true when TYPE should be 128bit aligned for 32bit argument
3548 passing ABI. XXX: This function is obsolete and is only used for
3549 checking psABI compatibility with previous versions of GCC. */
3551 static bool
3552 ix86_compat_aligned_value_p (const_tree type)
3554 machine_mode mode = TYPE_MODE (type);
3555 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3556 || mode == TDmode
3557 || mode == TFmode
3558 || mode == TCmode)
3559 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3560 return true;
3561 if (TYPE_ALIGN (type) < 128)
3562 return false;
3564 if (AGGREGATE_TYPE_P (type))
3566 /* Walk the aggregates recursively. */
3567 switch (TREE_CODE (type))
3569 case RECORD_TYPE:
3570 case UNION_TYPE:
3571 case QUAL_UNION_TYPE:
3573 tree field;
3575 /* Walk all the structure fields. */
3576 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3578 if (TREE_CODE (field) == FIELD_DECL
3579 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3580 return true;
3582 break;
3585 case ARRAY_TYPE:
3586 /* Just for use if some languages passes arrays by value. */
3587 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3588 return true;
3589 break;
3591 default:
3592 gcc_unreachable ();
3595 return false;
3598 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3599 XXX: This function is obsolete and is only used for checking psABI
3600 compatibility with previous versions of GCC. */
3602 static unsigned int
3603 ix86_compat_function_arg_boundary (machine_mode mode,
3604 const_tree type, unsigned int align)
3606 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3607 natural boundaries. */
3608 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3610 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3611 make an exception for SSE modes since these require 128bit
3612 alignment.
3614 The handling here differs from field_alignment. ICC aligns MMX
3615 arguments to 4 byte boundaries, while structure fields are aligned
3616 to 8 byte boundaries. */
3617 if (!type)
3619 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3620 align = PARM_BOUNDARY;
3622 else
3624 if (!ix86_compat_aligned_value_p (type))
3625 align = PARM_BOUNDARY;
3628 if (align > BIGGEST_ALIGNMENT)
3629 align = BIGGEST_ALIGNMENT;
3630 return align;
3633 /* Return true when TYPE should be 128bit aligned for 32bit argument
3634 passing ABI. */
3636 static bool
3637 ix86_contains_aligned_value_p (const_tree type)
3639 machine_mode mode = TYPE_MODE (type);
3641 if (mode == XFmode || mode == XCmode)
3642 return false;
3644 if (TYPE_ALIGN (type) < 128)
3645 return false;
3647 if (AGGREGATE_TYPE_P (type))
3649 /* Walk the aggregates recursively. */
3650 switch (TREE_CODE (type))
3652 case RECORD_TYPE:
3653 case UNION_TYPE:
3654 case QUAL_UNION_TYPE:
3656 tree field;
3658 /* Walk all the structure fields. */
3659 for (field = TYPE_FIELDS (type);
3660 field;
3661 field = DECL_CHAIN (field))
3663 if (TREE_CODE (field) == FIELD_DECL
3664 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3665 return true;
3667 break;
3670 case ARRAY_TYPE:
3671 /* Just for use if some languages passes arrays by value. */
3672 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3673 return true;
3674 break;
3676 default:
3677 gcc_unreachable ();
3680 else
3681 return TYPE_ALIGN (type) >= 128;
3683 return false;
3686 /* Gives the alignment boundary, in bits, of an argument with the
3687 specified mode and type. */
3689 static unsigned int
3690 ix86_function_arg_boundary (machine_mode mode, const_tree type)
3692 unsigned int align;
3693 if (type)
3695 /* Since the main variant type is used for call, we convert it to
3696 the main variant type. */
3697 type = TYPE_MAIN_VARIANT (type);
3698 align = TYPE_ALIGN (type);
3699 if (TYPE_EMPTY_P (type))
3700 return PARM_BOUNDARY;
3702 else
3703 align = GET_MODE_ALIGNMENT (mode);
3704 if (align < PARM_BOUNDARY)
3705 align = PARM_BOUNDARY;
3706 else
3708 static bool warned;
3709 unsigned int saved_align = align;
3711 if (!TARGET_64BIT)
3713 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3714 if (!type)
3716 if (mode == XFmode || mode == XCmode)
3717 align = PARM_BOUNDARY;
3719 else if (!ix86_contains_aligned_value_p (type))
3720 align = PARM_BOUNDARY;
3722 if (align < 128)
3723 align = PARM_BOUNDARY;
3726 if (warn_psabi
3727 && !warned
3728 && align != ix86_compat_function_arg_boundary (mode, type,
3729 saved_align))
3731 warned = true;
3732 inform (input_location,
3733 "the ABI for passing parameters with %d-byte"
3734 " alignment has changed in GCC 4.6",
3735 align / BITS_PER_UNIT);
3739 return align;
3742 /* Return true if N is a possible register number of function value. */
3744 static bool
3745 ix86_function_value_regno_p (const unsigned int regno)
3747 switch (regno)
3749 case AX_REG:
3750 return true;
3751 case DX_REG:
3752 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3753 case DI_REG:
3754 case SI_REG:
3755 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3757 /* Complex values are returned in %st(0)/%st(1) pair. */
3758 case ST0_REG:
3759 case ST1_REG:
3760 /* TODO: The function should depend on current function ABI but
3761 builtins.cc would need updating then. Therefore we use the
3762 default ABI. */
3763 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3764 return false;
3765 return TARGET_FLOAT_RETURNS_IN_80387;
3767 /* Complex values are returned in %xmm0/%xmm1 pair. */
3768 case XMM0_REG:
3769 case XMM1_REG:
3770 return TARGET_SSE;
3772 case MM0_REG:
3773 if (TARGET_MACHO || TARGET_64BIT)
3774 return false;
3775 return TARGET_MMX;
3778 return false;
3781 /* Check whether the register REGNO should be zeroed on X86.
3782 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3783 together, no need to zero it again.
3784 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3786 static bool
3787 zero_call_used_regno_p (const unsigned int regno,
3788 bool all_sse_zeroed,
3789 bool need_zero_mmx)
3791 return GENERAL_REGNO_P (regno)
3792 || (!all_sse_zeroed && SSE_REGNO_P (regno))
3793 || MASK_REGNO_P (regno)
3794 || (need_zero_mmx && MMX_REGNO_P (regno));
3797 /* Return the machine_mode that is used to zero register REGNO. */
3799 static machine_mode
3800 zero_call_used_regno_mode (const unsigned int regno)
3802 /* NB: We only need to zero the lower 32 bits for integer registers
3803 and the lower 128 bits for vector registers since destination are
3804 zero-extended to the full register width. */
3805 if (GENERAL_REGNO_P (regno))
3806 return SImode;
3807 else if (SSE_REGNO_P (regno))
3808 return V4SFmode;
3809 else if (MASK_REGNO_P (regno))
3810 return HImode;
3811 else if (MMX_REGNO_P (regno))
3812 return V2SImode;
3813 else
3814 gcc_unreachable ();
3817 /* Generate a rtx to zero all vector registers together if possible,
3818 otherwise, return NULL. */
3820 static rtx
3821 zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3823 if (!TARGET_AVX)
3824 return NULL;
3826 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3827 if ((LEGACY_SSE_REGNO_P (regno)
3828 || (TARGET_64BIT
3829 && (REX_SSE_REGNO_P (regno)
3830 || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3831 && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3832 return NULL;
3834 return gen_avx_vzeroall ();
3837 /* Generate insns to zero all st registers together.
3838 Return true when zeroing instructions are generated.
3839 Assume the number of st registers that are zeroed is num_of_st,
3840 we will emit the following sequence to zero them together:
3841 fldz; \
3842 fldz; \
3844 fldz; \
3845 fstp %%st(0); \
3846 fstp %%st(0); \
3848 fstp %%st(0);
3849 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3850 mark stack slots empty.
3852 How to compute the num_of_st:
3853 There is no direct mapping from stack registers to hard register
3854 numbers. If one stack register needs to be cleared, we don't know
3855 where in the stack the value remains. So, if any stack register
3856 needs to be cleared, the whole stack should be cleared. However,
3857 x87 stack registers that hold the return value should be excluded.
3858 x87 returns in the top (two for complex values) register, so
3859 num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
3860 return the value of num_of_st. */
3863 static int
3864 zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3867 /* If the FPU is disabled, no need to zero all st registers. */
3868 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3869 return 0;
3871 unsigned int num_of_st = 0;
3872 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3873 if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
3874 && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3876 num_of_st++;
3877 break;
3880 if (num_of_st == 0)
3881 return 0;
3883 bool return_with_x87 = false;
3884 return_with_x87 = (crtl->return_rtx
3885 && (STACK_REG_P (crtl->return_rtx)));
3887 bool complex_return = false;
3888 complex_return = (crtl->return_rtx
3889 && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
3891 if (return_with_x87)
3892 if (complex_return)
3893 num_of_st = 6;
3894 else
3895 num_of_st = 7;
3896 else
3897 num_of_st = 8;
3899 rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
3900 for (unsigned int i = 0; i < num_of_st; i++)
3901 emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
3903 for (unsigned int i = 0; i < num_of_st; i++)
3905 rtx insn;
3906 insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
3907 add_reg_note (insn, REG_DEAD, st_reg);
3909 return num_of_st;
3913 /* When the routine exit in MMX mode, if any ST register needs
3914 to be zeroed, we should clear all MMX registers except the
3915 RET_MMX_REGNO that holds the return value. */
3916 static bool
3917 zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
3918 unsigned int ret_mmx_regno)
3920 bool need_zero_all_mm = false;
3921 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3922 if (STACK_REGNO_P (regno)
3923 && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3925 need_zero_all_mm = true;
3926 break;
3929 if (!need_zero_all_mm)
3930 return false;
3932 machine_mode mode = V2SImode;
3933 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3934 if (regno != ret_mmx_regno)
3936 rtx reg = gen_rtx_REG (mode, regno);
3937 emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
3939 return true;
3942 /* TARGET_ZERO_CALL_USED_REGS. */
3943 /* Generate a sequence of instructions that zero registers specified by
3944 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
3945 zeroed. */
3946 static HARD_REG_SET
3947 ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
3949 HARD_REG_SET zeroed_hardregs;
3950 bool all_sse_zeroed = false;
3951 int all_st_zeroed_num = 0;
3952 bool all_mm_zeroed = false;
3954 CLEAR_HARD_REG_SET (zeroed_hardregs);
3956 /* first, let's see whether we can zero all vector registers together. */
3957 rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
3958 if (zero_all_vec_insn)
3960 emit_insn (zero_all_vec_insn);
3961 all_sse_zeroed = true;
3964 /* mm/st registers are shared registers set, we should follow the following
3965 rules to clear them:
3966 MMX exit mode x87 exit mode
3967 -------------|----------------------|---------------
3968 uses x87 reg | clear all MMX | clear all x87
3969 uses MMX reg | clear individual MMX | clear all x87
3970 x87 + MMX | clear all MMX | clear all x87
3972 first, we should decide which mode (MMX mode or x87 mode) the function
3973 exit with. */
3975 bool exit_with_mmx_mode = (crtl->return_rtx
3976 && (MMX_REG_P (crtl->return_rtx)));
3978 if (!exit_with_mmx_mode)
3979 /* x87 exit mode, we should zero all st registers together. */
3981 all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
3983 if (all_st_zeroed_num > 0)
3984 for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
3985 /* x87 stack registers that hold the return value should be excluded.
3986 x87 returns in the top (two for complex values) register. */
3987 if (all_st_zeroed_num == 8
3988 || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
3989 || (all_st_zeroed_num == 6
3990 && (regno == (REGNO (crtl->return_rtx) + 1)))))
3991 SET_HARD_REG_BIT (zeroed_hardregs, regno);
3993 else
3994 /* MMX exit mode, check whether we can zero all mm registers. */
3996 unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
3997 all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
3998 exit_mmx_regno);
3999 if (all_mm_zeroed)
4000 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
4001 if (regno != exit_mmx_regno)
4002 SET_HARD_REG_BIT (zeroed_hardregs, regno);
4005 /* Now, generate instructions to zero all the other registers. */
4007 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4009 if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
4010 continue;
4011 if (!zero_call_used_regno_p (regno, all_sse_zeroed,
4012 exit_with_mmx_mode && !all_mm_zeroed))
4013 continue;
4015 SET_HARD_REG_BIT (zeroed_hardregs, regno);
4017 machine_mode mode = zero_call_used_regno_mode (regno);
4019 rtx reg = gen_rtx_REG (mode, regno);
4020 rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
4022 switch (mode)
4024 case E_SImode:
4025 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
4027 rtx clob = gen_rtx_CLOBBER (VOIDmode,
4028 gen_rtx_REG (CCmode,
4029 FLAGS_REG));
4030 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
4031 tmp,
4032 clob));
4034 /* FALLTHRU. */
4036 case E_V4SFmode:
4037 case E_HImode:
4038 case E_V2SImode:
4039 emit_insn (tmp);
4040 break;
4042 default:
4043 gcc_unreachable ();
4046 return zeroed_hardregs;
4049 /* Define how to find the value returned by a function.
4050 VALTYPE is the data type of the value (as a tree).
4051 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4052 otherwise, FUNC is 0. */
4054 static rtx
4055 function_value_32 (machine_mode orig_mode, machine_mode mode,
4056 const_tree fntype, const_tree fn)
4058 unsigned int regno;
4060 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4061 we normally prevent this case when mmx is not available. However
4062 some ABIs may require the result to be returned like DImode. */
4063 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4064 regno = FIRST_MMX_REG;
4066 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4067 we prevent this case when sse is not available. However some ABIs
4068 may require the result to be returned like integer TImode. */
4069 else if (mode == TImode
4070 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4071 regno = FIRST_SSE_REG;
4073 /* 32-byte vector modes in %ymm0. */
4074 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
4075 regno = FIRST_SSE_REG;
4077 /* 64-byte vector modes in %zmm0. */
4078 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
4079 regno = FIRST_SSE_REG;
4081 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4082 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4083 regno = FIRST_FLOAT_REG;
4084 else
4085 /* Most things go in %eax. */
4086 regno = AX_REG;
4088 /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
4089 if (mode == HFmode || mode == BFmode)
4091 if (!TARGET_SSE2)
4093 error ("SSE register return with SSE2 disabled");
4094 regno = AX_REG;
4096 else
4097 regno = FIRST_SSE_REG;
4100 if (mode == HCmode)
4102 if (!TARGET_SSE2)
4103 error ("SSE register return with SSE2 disabled");
4105 rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
4106 XVECEXP (ret, 0, 0)
4107 = gen_rtx_EXPR_LIST (VOIDmode,
4108 gen_rtx_REG (SImode,
4109 TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
4110 GEN_INT (0));
4111 return ret;
4114 /* Override FP return register with %xmm0 for local functions when
4115 SSE math is enabled or for functions with sseregparm attribute. */
4116 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4118 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4119 if (sse_level == -1)
4121 error ("calling %qD with SSE calling convention without "
4122 "SSE/SSE2 enabled", fn);
4123 sorry ("this is a GCC bug that can be worked around by adding "
4124 "attribute used to function called");
4126 else if ((sse_level >= 1 && mode == SFmode)
4127 || (sse_level == 2 && mode == DFmode))
4128 regno = FIRST_SSE_REG;
4131 /* OImode shouldn't be used directly. */
4132 gcc_assert (mode != OImode);
4134 return gen_rtx_REG (orig_mode, regno);
4137 static rtx
4138 function_value_64 (machine_mode orig_mode, machine_mode mode,
4139 const_tree valtype)
4141 rtx ret;
4143 /* Handle libcalls, which don't provide a type node. */
4144 if (valtype == NULL)
4146 unsigned int regno;
4148 switch (mode)
4150 case E_BFmode:
4151 case E_HFmode:
4152 case E_HCmode:
4153 case E_SFmode:
4154 case E_SCmode:
4155 case E_DFmode:
4156 case E_DCmode:
4157 case E_TFmode:
4158 case E_SDmode:
4159 case E_DDmode:
4160 case E_TDmode:
4161 regno = FIRST_SSE_REG;
4162 break;
4163 case E_XFmode:
4164 case E_XCmode:
4165 regno = FIRST_FLOAT_REG;
4166 break;
4167 case E_TCmode:
4168 return NULL;
4169 default:
4170 regno = AX_REG;
4173 return gen_rtx_REG (mode, regno);
4175 else if (POINTER_TYPE_P (valtype))
4177 /* Pointers are always returned in word_mode. */
4178 mode = word_mode;
4181 ret = construct_container (mode, orig_mode, valtype, 1,
4182 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4183 x86_64_int_return_registers, 0);
4185 /* For zero sized structures, construct_container returns NULL, but we
4186 need to keep rest of compiler happy by returning meaningful value. */
4187 if (!ret)
4188 ret = gen_rtx_REG (orig_mode, AX_REG);
4190 return ret;
4193 static rtx
4194 function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
4195 const_tree fntype, const_tree fn, const_tree valtype)
4197 unsigned int regno;
4199 /* Floating point return values in %st(0)
4200 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4201 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
4202 && (GET_MODE_SIZE (mode) > 8
4203 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
4205 regno = FIRST_FLOAT_REG;
4206 return gen_rtx_REG (orig_mode, regno);
4208 else
4209 return function_value_32(orig_mode, mode, fntype,fn);
4212 static rtx
4213 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
4214 const_tree valtype)
4216 unsigned int regno = AX_REG;
4218 if (TARGET_SSE)
4220 switch (GET_MODE_SIZE (mode))
4222 case 16:
4223 if (valtype != NULL_TREE
4224 && !VECTOR_INTEGER_TYPE_P (valtype)
4225 && !VECTOR_INTEGER_TYPE_P (valtype)
4226 && !INTEGRAL_TYPE_P (valtype)
4227 && !VECTOR_FLOAT_TYPE_P (valtype))
4228 break;
4229 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4230 && !COMPLEX_MODE_P (mode))
4231 regno = FIRST_SSE_REG;
4232 break;
4233 case 8:
4234 case 4:
4235 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4236 break;
4237 if (mode == SFmode || mode == DFmode)
4238 regno = FIRST_SSE_REG;
4239 break;
4240 default:
4241 break;
4244 return gen_rtx_REG (orig_mode, regno);
4247 static rtx
4248 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4249 machine_mode orig_mode, machine_mode mode)
4251 const_tree fn, fntype;
4253 fn = NULL_TREE;
4254 if (fntype_or_decl && DECL_P (fntype_or_decl))
4255 fn = fntype_or_decl;
4256 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4258 if (ix86_function_type_abi (fntype) == MS_ABI)
4260 if (TARGET_64BIT)
4261 return function_value_ms_64 (orig_mode, mode, valtype);
4262 else
4263 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4265 else if (TARGET_64BIT)
4266 return function_value_64 (orig_mode, mode, valtype);
4267 else
4268 return function_value_32 (orig_mode, mode, fntype, fn);
4271 static rtx
4272 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4274 machine_mode mode, orig_mode;
4276 orig_mode = TYPE_MODE (valtype);
4277 mode = type_natural_mode (valtype, NULL, true);
4278 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4281 /* Pointer function arguments and return values are promoted to
4282 word_mode for normal functions. */
4284 static machine_mode
4285 ix86_promote_function_mode (const_tree type, machine_mode mode,
4286 int *punsignedp, const_tree fntype,
4287 int for_return)
4289 if (cfun->machine->func_type == TYPE_NORMAL
4290 && type != NULL_TREE
4291 && POINTER_TYPE_P (type))
4293 *punsignedp = POINTERS_EXTEND_UNSIGNED;
4294 return word_mode;
4296 return default_promote_function_mode (type, mode, punsignedp, fntype,
4297 for_return);
4300 /* Return true if a structure, union or array with MODE containing FIELD
4301 should be accessed using BLKmode. */
4303 static bool
4304 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4306 /* Union with XFmode must be in BLKmode. */
4307 return (mode == XFmode
4308 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4309 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4313 ix86_libcall_value (machine_mode mode)
4315 return ix86_function_value_1 (NULL, NULL, mode, mode);
4318 /* Return true iff type is returned in memory. */
4320 static bool
4321 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4323 const machine_mode mode = type_natural_mode (type, NULL, true);
4324 HOST_WIDE_INT size;
4326 if (TARGET_64BIT)
4328 if (ix86_function_type_abi (fntype) == MS_ABI)
4330 size = int_size_in_bytes (type);
4332 /* __m128 is returned in xmm0. */
4333 if ((!type || VECTOR_INTEGER_TYPE_P (type)
4334 || INTEGRAL_TYPE_P (type)
4335 || VECTOR_FLOAT_TYPE_P (type))
4336 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4337 && !COMPLEX_MODE_P (mode)
4338 && (GET_MODE_SIZE (mode) == 16 || size == 16))
4339 return false;
4341 /* Otherwise, the size must be exactly in [1248]. */
4342 return size != 1 && size != 2 && size != 4 && size != 8;
4344 else
4346 int needed_intregs, needed_sseregs;
4348 return examine_argument (mode, type, 1,
4349 &needed_intregs, &needed_sseregs);
4352 else
4354 size = int_size_in_bytes (type);
4356 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4357 bytes in registers. */
4358 if (TARGET_IAMCU)
4359 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4361 if (mode == BLKmode)
4362 return true;
4364 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4365 return false;
4367 if (VECTOR_MODE_P (mode) || mode == TImode)
4369 /* User-created vectors small enough to fit in EAX. */
4370 if (size < 8)
4371 return false;
4373 /* Unless ABI prescibes otherwise,
4374 MMX/3dNow values are returned in MM0 if available. */
4376 if (size == 8)
4377 return TARGET_VECT8_RETURNS || !TARGET_MMX;
4379 /* SSE values are returned in XMM0 if available. */
4380 if (size == 16)
4381 return !TARGET_SSE;
4383 /* AVX values are returned in YMM0 if available. */
4384 if (size == 32)
4385 return !TARGET_AVX;
4387 /* AVX512F values are returned in ZMM0 if available. */
4388 if (size == 64)
4389 return !TARGET_AVX512F || !TARGET_EVEX512;
4392 if (mode == XFmode)
4393 return false;
4395 if (size > 12)
4396 return true;
4398 /* OImode shouldn't be used directly. */
4399 gcc_assert (mode != OImode);
4401 return false;
4405 /* Implement TARGET_PUSH_ARGUMENT. */
4407 static bool
4408 ix86_push_argument (unsigned int npush)
4410 /* If SSE2 is available, use vector move to put large argument onto
4411 stack. NB: In 32-bit mode, use 8-byte vector move. */
4412 return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
4413 && TARGET_PUSH_ARGS
4414 && !ACCUMULATE_OUTGOING_ARGS);
4418 /* Create the va_list data type. */
4420 static tree
4421 ix86_build_builtin_va_list_64 (void)
4423 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4425 record = lang_hooks.types.make_type (RECORD_TYPE);
4426 type_decl = build_decl (BUILTINS_LOCATION,
4427 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4429 f_gpr = build_decl (BUILTINS_LOCATION,
4430 FIELD_DECL, get_identifier ("gp_offset"),
4431 unsigned_type_node);
4432 f_fpr = build_decl (BUILTINS_LOCATION,
4433 FIELD_DECL, get_identifier ("fp_offset"),
4434 unsigned_type_node);
4435 f_ovf = build_decl (BUILTINS_LOCATION,
4436 FIELD_DECL, get_identifier ("overflow_arg_area"),
4437 ptr_type_node);
4438 f_sav = build_decl (BUILTINS_LOCATION,
4439 FIELD_DECL, get_identifier ("reg_save_area"),
4440 ptr_type_node);
4442 va_list_gpr_counter_field = f_gpr;
4443 va_list_fpr_counter_field = f_fpr;
4445 DECL_FIELD_CONTEXT (f_gpr) = record;
4446 DECL_FIELD_CONTEXT (f_fpr) = record;
4447 DECL_FIELD_CONTEXT (f_ovf) = record;
4448 DECL_FIELD_CONTEXT (f_sav) = record;
4450 TYPE_STUB_DECL (record) = type_decl;
4451 TYPE_NAME (record) = type_decl;
4452 TYPE_FIELDS (record) = f_gpr;
4453 DECL_CHAIN (f_gpr) = f_fpr;
4454 DECL_CHAIN (f_fpr) = f_ovf;
4455 DECL_CHAIN (f_ovf) = f_sav;
4457 layout_type (record);
4459 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4460 NULL_TREE, TYPE_ATTRIBUTES (record));
4462 /* The correct type is an array type of one element. */
4463 return build_array_type (record, build_index_type (size_zero_node));
4466 /* Setup the builtin va_list data type and for 64-bit the additional
4467 calling convention specific va_list data types. */
4469 static tree
4470 ix86_build_builtin_va_list (void)
4472 if (TARGET_64BIT)
4474 /* Initialize ABI specific va_list builtin types.
4476 In lto1, we can encounter two va_list types:
4477 - one as a result of the type-merge across TUs, and
4478 - the one constructed here.
4479 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4480 a type identity check in canonical_va_list_type based on
4481 TYPE_MAIN_VARIANT (which we used to have) will not work.
4482 Instead, we tag each va_list_type_node with its unique attribute, and
4483 look for the attribute in the type identity check in
4484 canonical_va_list_type.
4486 Tagging sysv_va_list_type_node directly with the attribute is
4487 problematic since it's a array of one record, which will degrade into a
4488 pointer to record when used as parameter (see build_va_arg comments for
4489 an example), dropping the attribute in the process. So we tag the
4490 record instead. */
4492 /* For SYSV_ABI we use an array of one record. */
4493 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4495 /* For MS_ABI we use plain pointer to argument area. */
4496 tree char_ptr_type = build_pointer_type (char_type_node);
4497 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4498 TYPE_ATTRIBUTES (char_ptr_type));
4499 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4501 return ((ix86_abi == MS_ABI)
4502 ? ms_va_list_type_node
4503 : sysv_va_list_type_node);
4505 else
4507 /* For i386 we use plain pointer to argument area. */
4508 return build_pointer_type (char_type_node);
4512 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4514 static void
4515 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4517 rtx save_area, mem;
4518 alias_set_type set;
4519 int i, max;
4521 /* GPR size of varargs save area. */
4522 if (cfun->va_list_gpr_size)
4523 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4524 else
4525 ix86_varargs_gpr_size = 0;
4527 /* FPR size of varargs save area. We don't need it if we don't pass
4528 anything in SSE registers. */
4529 if (TARGET_SSE && cfun->va_list_fpr_size)
4530 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4531 else
4532 ix86_varargs_fpr_size = 0;
4534 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4535 return;
4537 save_area = frame_pointer_rtx;
4538 set = get_varargs_alias_set ();
4540 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4541 if (max > X86_64_REGPARM_MAX)
4542 max = X86_64_REGPARM_MAX;
4544 for (i = cum->regno; i < max; i++)
4546 mem = gen_rtx_MEM (word_mode,
4547 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4548 MEM_NOTRAP_P (mem) = 1;
4549 set_mem_alias_set (mem, set);
4550 emit_move_insn (mem,
4551 gen_rtx_REG (word_mode,
4552 x86_64_int_parameter_registers[i]));
4555 if (ix86_varargs_fpr_size)
4557 machine_mode smode;
4558 rtx_code_label *label;
4559 rtx test;
4561 /* Now emit code to save SSE registers. The AX parameter contains number
4562 of SSE parameter registers used to call this function, though all we
4563 actually check here is the zero/non-zero status. */
4565 label = gen_label_rtx ();
4566 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4567 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4568 label));
4570 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4571 we used movdqa (i.e. TImode) instead? Perhaps even better would
4572 be if we could determine the real mode of the data, via a hook
4573 into pass_stdarg. Ignore all that for now. */
4574 smode = V4SFmode;
4575 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4576 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4578 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4579 if (max > X86_64_SSE_REGPARM_MAX)
4580 max = X86_64_SSE_REGPARM_MAX;
4582 for (i = cum->sse_regno; i < max; ++i)
4584 mem = plus_constant (Pmode, save_area,
4585 i * 16 + ix86_varargs_gpr_size);
4586 mem = gen_rtx_MEM (smode, mem);
4587 MEM_NOTRAP_P (mem) = 1;
4588 set_mem_alias_set (mem, set);
4589 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4591 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4594 emit_label (label);
4598 static void
4599 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4601 alias_set_type set = get_varargs_alias_set ();
4602 int i;
4604 /* Reset to zero, as there might be a sysv vaarg used
4605 before. */
4606 ix86_varargs_gpr_size = 0;
4607 ix86_varargs_fpr_size = 0;
4609 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4611 rtx reg, mem;
4613 mem = gen_rtx_MEM (Pmode,
4614 plus_constant (Pmode, virtual_incoming_args_rtx,
4615 i * UNITS_PER_WORD));
4616 MEM_NOTRAP_P (mem) = 1;
4617 set_mem_alias_set (mem, set);
4619 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4620 emit_move_insn (mem, reg);
4624 static void
4625 ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4626 const function_arg_info &arg,
4627 int *, int no_rtl)
4629 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4630 CUMULATIVE_ARGS next_cum;
4631 tree fntype;
4633 /* This argument doesn't appear to be used anymore. Which is good,
4634 because the old code here didn't suppress rtl generation. */
4635 gcc_assert (!no_rtl);
4637 if (!TARGET_64BIT)
4638 return;
4640 fntype = TREE_TYPE (current_function_decl);
4642 /* For varargs, we do not want to skip the dummy va_dcl argument.
4643 For stdargs, we do want to skip the last named argument. */
4644 next_cum = *cum;
4645 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
4646 && stdarg_p (fntype))
4647 ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4649 if (cum->call_abi == MS_ABI)
4650 setup_incoming_varargs_ms_64 (&next_cum);
4651 else
4652 setup_incoming_varargs_64 (&next_cum);
4655 /* Checks if TYPE is of kind va_list char *. */
4657 static bool
4658 is_va_list_char_pointer (tree type)
4660 tree canonic;
4662 /* For 32-bit it is always true. */
4663 if (!TARGET_64BIT)
4664 return true;
4665 canonic = ix86_canonical_va_list_type (type);
4666 return (canonic == ms_va_list_type_node
4667 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4670 /* Implement va_start. */
4672 static void
4673 ix86_va_start (tree valist, rtx nextarg)
4675 HOST_WIDE_INT words, n_gpr, n_fpr;
4676 tree f_gpr, f_fpr, f_ovf, f_sav;
4677 tree gpr, fpr, ovf, sav, t;
4678 tree type;
4679 rtx ovf_rtx;
4681 if (flag_split_stack
4682 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4684 unsigned int scratch_regno;
4686 /* When we are splitting the stack, we can't refer to the stack
4687 arguments using internal_arg_pointer, because they may be on
4688 the old stack. The split stack prologue will arrange to
4689 leave a pointer to the old stack arguments in a scratch
4690 register, which we here copy to a pseudo-register. The split
4691 stack prologue can't set the pseudo-register directly because
4692 it (the prologue) runs before any registers have been saved. */
4694 scratch_regno = split_stack_prologue_scratch_regno ();
4695 if (scratch_regno != INVALID_REGNUM)
4697 rtx reg;
4698 rtx_insn *seq;
4700 reg = gen_reg_rtx (Pmode);
4701 cfun->machine->split_stack_varargs_pointer = reg;
4703 start_sequence ();
4704 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4705 seq = get_insns ();
4706 end_sequence ();
4708 push_topmost_sequence ();
4709 emit_insn_after (seq, entry_of_function ());
4710 pop_topmost_sequence ();
4714 /* Only 64bit target needs something special. */
4715 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4717 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4718 std_expand_builtin_va_start (valist, nextarg);
4719 else
4721 rtx va_r, next;
4723 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4724 next = expand_binop (ptr_mode, add_optab,
4725 cfun->machine->split_stack_varargs_pointer,
4726 crtl->args.arg_offset_rtx,
4727 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4728 convert_move (va_r, next, 0);
4730 return;
4733 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4734 f_fpr = DECL_CHAIN (f_gpr);
4735 f_ovf = DECL_CHAIN (f_fpr);
4736 f_sav = DECL_CHAIN (f_ovf);
4738 valist = build_simple_mem_ref (valist);
4739 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4740 /* The following should be folded into the MEM_REF offset. */
4741 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4742 f_gpr, NULL_TREE);
4743 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4744 f_fpr, NULL_TREE);
4745 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4746 f_ovf, NULL_TREE);
4747 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4748 f_sav, NULL_TREE);
4750 /* Count number of gp and fp argument registers used. */
4751 words = crtl->args.info.words;
4752 n_gpr = crtl->args.info.regno;
4753 n_fpr = crtl->args.info.sse_regno;
4755 if (cfun->va_list_gpr_size)
4757 type = TREE_TYPE (gpr);
4758 t = build2 (MODIFY_EXPR, type,
4759 gpr, build_int_cst (type, n_gpr * 8));
4760 TREE_SIDE_EFFECTS (t) = 1;
4761 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4764 if (TARGET_SSE && cfun->va_list_fpr_size)
4766 type = TREE_TYPE (fpr);
4767 t = build2 (MODIFY_EXPR, type, fpr,
4768 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4769 TREE_SIDE_EFFECTS (t) = 1;
4770 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4773 /* Find the overflow area. */
4774 type = TREE_TYPE (ovf);
4775 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4776 ovf_rtx = crtl->args.internal_arg_pointer;
4777 else
4778 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4779 t = make_tree (type, ovf_rtx);
4780 if (words != 0)
4781 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4783 t = build2 (MODIFY_EXPR, type, ovf, t);
4784 TREE_SIDE_EFFECTS (t) = 1;
4785 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4787 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4789 /* Find the register save area.
4790 Prologue of the function save it right above stack frame. */
4791 type = TREE_TYPE (sav);
4792 t = make_tree (type, frame_pointer_rtx);
4793 if (!ix86_varargs_gpr_size)
4794 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4796 t = build2 (MODIFY_EXPR, type, sav, t);
4797 TREE_SIDE_EFFECTS (t) = 1;
4798 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4802 /* Implement va_arg. */
4804 static tree
4805 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4806 gimple_seq *post_p)
4808 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4809 tree f_gpr, f_fpr, f_ovf, f_sav;
4810 tree gpr, fpr, ovf, sav, t;
4811 int size, rsize;
4812 tree lab_false, lab_over = NULL_TREE;
4813 tree addr, t2;
4814 rtx container;
4815 int indirect_p = 0;
4816 tree ptrtype;
4817 machine_mode nat_mode;
4818 unsigned int arg_boundary;
4819 unsigned int type_align;
4821 /* Only 64bit target needs something special. */
4822 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4823 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4825 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4826 f_fpr = DECL_CHAIN (f_gpr);
4827 f_ovf = DECL_CHAIN (f_fpr);
4828 f_sav = DECL_CHAIN (f_ovf);
4830 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4831 valist, f_gpr, NULL_TREE);
4833 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4834 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4835 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4837 indirect_p = pass_va_arg_by_reference (type);
4838 if (indirect_p)
4839 type = build_pointer_type (type);
4840 size = arg_int_size_in_bytes (type);
4841 rsize = CEIL (size, UNITS_PER_WORD);
4843 nat_mode = type_natural_mode (type, NULL, false);
4844 switch (nat_mode)
4846 case E_V16HFmode:
4847 case E_V16BFmode:
4848 case E_V8SFmode:
4849 case E_V8SImode:
4850 case E_V32QImode:
4851 case E_V16HImode:
4852 case E_V4DFmode:
4853 case E_V4DImode:
4854 case E_V32HFmode:
4855 case E_V32BFmode:
4856 case E_V16SFmode:
4857 case E_V16SImode:
4858 case E_V64QImode:
4859 case E_V32HImode:
4860 case E_V8DFmode:
4861 case E_V8DImode:
4862 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4863 if (!TARGET_64BIT_MS_ABI)
4865 container = NULL;
4866 break;
4868 /* FALLTHRU */
4870 default:
4871 container = construct_container (nat_mode, TYPE_MODE (type),
4872 type, 0, X86_64_REGPARM_MAX,
4873 X86_64_SSE_REGPARM_MAX, intreg,
4875 break;
4878 /* Pull the value out of the saved registers. */
4880 addr = create_tmp_var (ptr_type_node, "addr");
4881 type_align = TYPE_ALIGN (type);
4883 if (container)
4885 int needed_intregs, needed_sseregs;
4886 bool need_temp;
4887 tree int_addr, sse_addr;
4889 lab_false = create_artificial_label (UNKNOWN_LOCATION);
4890 lab_over = create_artificial_label (UNKNOWN_LOCATION);
4892 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4894 need_temp = (!REG_P (container)
4895 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4896 || TYPE_ALIGN (type) > 128));
4898 /* In case we are passing structure, verify that it is consecutive block
4899 on the register save area. If not we need to do moves. */
4900 if (!need_temp && !REG_P (container))
4902 /* Verify that all registers are strictly consecutive */
4903 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4905 int i;
4907 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4909 rtx slot = XVECEXP (container, 0, i);
4910 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4911 || INTVAL (XEXP (slot, 1)) != i * 16)
4912 need_temp = true;
4915 else
4917 int i;
4919 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4921 rtx slot = XVECEXP (container, 0, i);
4922 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4923 || INTVAL (XEXP (slot, 1)) != i * 8)
4924 need_temp = true;
4928 if (!need_temp)
4930 int_addr = addr;
4931 sse_addr = addr;
4933 else
4935 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4936 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4939 /* First ensure that we fit completely in registers. */
4940 if (needed_intregs)
4942 t = build_int_cst (TREE_TYPE (gpr),
4943 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4944 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4945 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4946 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4947 gimplify_and_add (t, pre_p);
4949 if (needed_sseregs)
4951 t = build_int_cst (TREE_TYPE (fpr),
4952 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4953 + X86_64_REGPARM_MAX * 8);
4954 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4955 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4956 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4957 gimplify_and_add (t, pre_p);
4960 /* Compute index to start of area used for integer regs. */
4961 if (needed_intregs)
4963 /* int_addr = gpr + sav; */
4964 t = fold_build_pointer_plus (sav, gpr);
4965 gimplify_assign (int_addr, t, pre_p);
4967 if (needed_sseregs)
4969 /* sse_addr = fpr + sav; */
4970 t = fold_build_pointer_plus (sav, fpr);
4971 gimplify_assign (sse_addr, t, pre_p);
4973 if (need_temp)
4975 int i, prev_size = 0;
4976 tree temp = create_tmp_var (type, "va_arg_tmp");
4977 TREE_ADDRESSABLE (temp) = 1;
4979 /* addr = &temp; */
4980 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4981 gimplify_assign (addr, t, pre_p);
4983 for (i = 0; i < XVECLEN (container, 0); i++)
4985 rtx slot = XVECEXP (container, 0, i);
4986 rtx reg = XEXP (slot, 0);
4987 machine_mode mode = GET_MODE (reg);
4988 tree piece_type;
4989 tree addr_type;
4990 tree daddr_type;
4991 tree src_addr, src;
4992 int src_offset;
4993 tree dest_addr, dest;
4994 int cur_size = GET_MODE_SIZE (mode);
4996 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
4997 prev_size = INTVAL (XEXP (slot, 1));
4998 if (prev_size + cur_size > size)
5000 cur_size = size - prev_size;
5001 unsigned int nbits = cur_size * BITS_PER_UNIT;
5002 if (!int_mode_for_size (nbits, 1).exists (&mode))
5003 mode = QImode;
5005 piece_type = lang_hooks.types.type_for_mode (mode, 1);
5006 if (mode == GET_MODE (reg))
5007 addr_type = build_pointer_type (piece_type);
5008 else
5009 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5010 true);
5011 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
5012 true);
5014 if (SSE_REGNO_P (REGNO (reg)))
5016 src_addr = sse_addr;
5017 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5019 else
5021 src_addr = int_addr;
5022 src_offset = REGNO (reg) * 8;
5024 src_addr = fold_convert (addr_type, src_addr);
5025 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
5027 dest_addr = fold_convert (daddr_type, addr);
5028 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
5029 if (cur_size == GET_MODE_SIZE (mode))
5031 src = build_va_arg_indirect_ref (src_addr);
5032 dest = build_va_arg_indirect_ref (dest_addr);
5034 gimplify_assign (dest, src, pre_p);
5036 else
5038 tree copy
5039 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
5040 3, dest_addr, src_addr,
5041 size_int (cur_size));
5042 gimplify_and_add (copy, pre_p);
5044 prev_size += cur_size;
5048 if (needed_intregs)
5050 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5051 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5052 gimplify_assign (gpr, t, pre_p);
5053 /* The GPR save area guarantees only 8-byte alignment. */
5054 if (!need_temp)
5055 type_align = MIN (type_align, 64);
5058 if (needed_sseregs)
5060 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5061 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5062 gimplify_assign (unshare_expr (fpr), t, pre_p);
5065 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
5067 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
5070 /* ... otherwise out of the overflow area. */
5072 /* When we align parameter on stack for caller, if the parameter
5073 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
5074 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
5075 here with caller. */
5076 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
5077 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
5078 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
5080 /* Care for on-stack alignment if needed. */
5081 if (arg_boundary <= 64 || size == 0)
5082 t = ovf;
5083 else
5085 HOST_WIDE_INT align = arg_boundary / 8;
5086 t = fold_build_pointer_plus_hwi (ovf, align - 1);
5087 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5088 build_int_cst (TREE_TYPE (t), -align));
5091 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5092 gimplify_assign (addr, t, pre_p);
5094 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
5095 gimplify_assign (unshare_expr (ovf), t, pre_p);
5097 if (container)
5098 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
5100 type = build_aligned_type (type, type_align);
5101 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
5102 addr = fold_convert (ptrtype, addr);
5104 if (indirect_p)
5105 addr = build_va_arg_indirect_ref (addr);
5106 return build_va_arg_indirect_ref (addr);
5109 /* Return true if OPNUM's MEM should be matched
5110 in movabs* patterns. */
5112 bool
5113 ix86_check_movabs (rtx insn, int opnum)
5115 rtx set, mem;
5117 set = PATTERN (insn);
5118 if (GET_CODE (set) == PARALLEL)
5119 set = XVECEXP (set, 0, 0);
5120 gcc_assert (GET_CODE (set) == SET);
5121 mem = XEXP (set, opnum);
5122 while (SUBREG_P (mem))
5123 mem = SUBREG_REG (mem);
5124 gcc_assert (MEM_P (mem));
5125 return volatile_ok || !MEM_VOLATILE_P (mem);
5128 /* Return false if INSN contains a MEM with a non-default address space. */
5129 bool
5130 ix86_check_no_addr_space (rtx insn)
5132 subrtx_var_iterator::array_type array;
5133 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
5135 rtx x = *iter;
5136 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
5137 return false;
5139 return true;
5142 /* Initialize the table of extra 80387 mathematical constants. */
5144 static void
5145 init_ext_80387_constants (void)
5147 static const char * cst[5] =
5149 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5150 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5151 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5152 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5153 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5155 int i;
5157 for (i = 0; i < 5; i++)
5159 real_from_string (&ext_80387_constants_table[i], cst[i]);
5160 /* Ensure each constant is rounded to XFmode precision. */
5161 real_convert (&ext_80387_constants_table[i],
5162 XFmode, &ext_80387_constants_table[i]);
5165 ext_80387_constants_init = 1;
5168 /* Return non-zero if the constant is something that
5169 can be loaded with a special instruction. */
5172 standard_80387_constant_p (rtx x)
5174 machine_mode mode = GET_MODE (x);
5176 const REAL_VALUE_TYPE *r;
5178 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
5179 return -1;
5181 if (x == CONST0_RTX (mode))
5182 return 1;
5183 if (x == CONST1_RTX (mode))
5184 return 2;
5186 r = CONST_DOUBLE_REAL_VALUE (x);
5188 /* For XFmode constants, try to find a special 80387 instruction when
5189 optimizing for size or on those CPUs that benefit from them. */
5190 if (mode == XFmode
5191 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
5192 && !flag_rounding_math)
5194 int i;
5196 if (! ext_80387_constants_init)
5197 init_ext_80387_constants ();
5199 for (i = 0; i < 5; i++)
5200 if (real_identical (r, &ext_80387_constants_table[i]))
5201 return i + 3;
5204 /* Load of the constant -0.0 or -1.0 will be split as
5205 fldz;fchs or fld1;fchs sequence. */
5206 if (real_isnegzero (r))
5207 return 8;
5208 if (real_identical (r, &dconstm1))
5209 return 9;
5211 return 0;
5214 /* Return the opcode of the special instruction to be used to load
5215 the constant X. */
5217 const char *
5218 standard_80387_constant_opcode (rtx x)
5220 switch (standard_80387_constant_p (x))
5222 case 1:
5223 return "fldz";
5224 case 2:
5225 return "fld1";
5226 case 3:
5227 return "fldlg2";
5228 case 4:
5229 return "fldln2";
5230 case 5:
5231 return "fldl2e";
5232 case 6:
5233 return "fldl2t";
5234 case 7:
5235 return "fldpi";
5236 case 8:
5237 case 9:
5238 return "#";
5239 default:
5240 gcc_unreachable ();
5244 /* Return the CONST_DOUBLE representing the 80387 constant that is
5245 loaded by the specified special instruction. The argument IDX
5246 matches the return value from standard_80387_constant_p. */
5249 standard_80387_constant_rtx (int idx)
5251 int i;
5253 if (! ext_80387_constants_init)
5254 init_ext_80387_constants ();
5256 switch (idx)
5258 case 3:
5259 case 4:
5260 case 5:
5261 case 6:
5262 case 7:
5263 i = idx - 3;
5264 break;
5266 default:
5267 gcc_unreachable ();
5270 return const_double_from_real_value (ext_80387_constants_table[i],
5271 XFmode);
5274 /* Return 1 if X is all bits 0, 2 if X is all bits 1
5275 and 3 if X is all bits 1 with zero extend
5276 in supported SSE/AVX vector mode. */
5279 standard_sse_constant_p (rtx x, machine_mode pred_mode)
5281 machine_mode mode;
5283 if (!TARGET_SSE)
5284 return 0;
5286 mode = GET_MODE (x);
5288 if (x == const0_rtx || const0_operand (x, mode))
5289 return 1;
5291 if (x == constm1_rtx
5292 || vector_all_ones_operand (x, mode)
5293 || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5294 || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
5295 && float_vector_all_ones_operand (x, mode)))
5297 /* VOIDmode integer constant, get mode from the predicate. */
5298 if (mode == VOIDmode)
5299 mode = pred_mode;
5301 switch (GET_MODE_SIZE (mode))
5303 case 64:
5304 if (TARGET_AVX512F && TARGET_EVEX512)
5305 return 2;
5306 break;
5307 case 32:
5308 if (TARGET_AVX2)
5309 return 2;
5310 break;
5311 case 16:
5312 if (TARGET_SSE2)
5313 return 2;
5314 break;
5315 case 0:
5316 /* VOIDmode */
5317 gcc_unreachable ();
5318 default:
5319 break;
5323 if (vector_all_ones_zero_extend_half_operand (x, mode)
5324 || vector_all_ones_zero_extend_quarter_operand (x, mode))
5325 return 3;
5327 return 0;
5330 /* Return the opcode of the special instruction to be used to load
5331 the constant operands[1] into operands[0]. */
5333 const char *
5334 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5336 machine_mode mode;
5337 rtx x = operands[1];
5339 gcc_assert (TARGET_SSE);
5341 mode = GET_MODE (x);
5343 if (x == const0_rtx || const0_operand (x, mode))
5345 switch (get_attr_mode (insn))
5347 case MODE_TI:
5348 if (!EXT_REX_SSE_REG_P (operands[0]))
5349 return "%vpxor\t%0, %d0";
5350 /* FALLTHRU */
5351 case MODE_XI:
5352 case MODE_OI:
5353 if (EXT_REX_SSE_REG_P (operands[0]))
5355 if (TARGET_AVX512VL)
5356 return "vpxord\t%x0, %x0, %x0";
5357 else if (TARGET_EVEX512)
5358 return "vpxord\t%g0, %g0, %g0";
5359 else
5360 gcc_unreachable ();
5362 return "vpxor\t%x0, %x0, %x0";
5364 case MODE_V2DF:
5365 if (!EXT_REX_SSE_REG_P (operands[0]))
5366 return "%vxorpd\t%0, %d0";
5367 /* FALLTHRU */
5368 case MODE_V8DF:
5369 case MODE_V4DF:
5370 if (EXT_REX_SSE_REG_P (operands[0]))
5372 if (TARGET_AVX512DQ)
5374 if (TARGET_AVX512VL)
5375 return "vxorpd\t%x0, %x0, %x0";
5376 else if (TARGET_EVEX512)
5377 return "vxorpd\t%g0, %g0, %g0";
5378 else
5379 gcc_unreachable ();
5381 else
5383 if (TARGET_AVX512VL)
5384 return "vpxorq\t%x0, %x0, %x0";
5385 else if (TARGET_EVEX512)
5386 return "vpxorq\t%g0, %g0, %g0";
5387 else
5388 gcc_unreachable ();
5391 return "vxorpd\t%x0, %x0, %x0";
5393 case MODE_V4SF:
5394 if (!EXT_REX_SSE_REG_P (operands[0]))
5395 return "%vxorps\t%0, %d0";
5396 /* FALLTHRU */
5397 case MODE_V16SF:
5398 case MODE_V8SF:
5399 if (EXT_REX_SSE_REG_P (operands[0]))
5401 if (TARGET_AVX512DQ)
5403 if (TARGET_AVX512VL)
5404 return "vxorps\t%x0, %x0, %x0";
5405 else if (TARGET_EVEX512)
5406 return "vxorps\t%g0, %g0, %g0";
5407 else
5408 gcc_unreachable ();
5410 else
5412 if (TARGET_AVX512VL)
5413 return "vpxord\t%x0, %x0, %x0";
5414 else if (TARGET_EVEX512)
5415 return "vpxord\t%g0, %g0, %g0";
5416 else
5417 gcc_unreachable ();
5420 return "vxorps\t%x0, %x0, %x0";
5422 default:
5423 gcc_unreachable ();
5426 else if (x == constm1_rtx
5427 || vector_all_ones_operand (x, mode)
5428 || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5429 && float_vector_all_ones_operand (x, mode)))
5431 enum attr_mode insn_mode = get_attr_mode (insn);
5433 switch (insn_mode)
5435 case MODE_XI:
5436 case MODE_V8DF:
5437 case MODE_V16SF:
5438 gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
5439 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5441 case MODE_OI:
5442 case MODE_V4DF:
5443 case MODE_V8SF:
5444 gcc_assert (TARGET_AVX2);
5445 /* FALLTHRU */
5446 case MODE_TI:
5447 case MODE_V2DF:
5448 case MODE_V4SF:
5449 gcc_assert (TARGET_SSE2);
5450 if (EXT_REX_SSE_REG_P (operands[0]))
5452 if (TARGET_AVX512VL)
5453 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5454 else if (TARGET_EVEX512)
5455 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5456 else
5457 gcc_unreachable ();
5459 return (TARGET_AVX
5460 ? "vpcmpeqd\t%0, %0, %0"
5461 : "pcmpeqd\t%0, %0");
5463 default:
5464 gcc_unreachable ();
5467 else if (vector_all_ones_zero_extend_half_operand (x, mode))
5469 if (GET_MODE_SIZE (mode) == 64)
5471 gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
5472 return "vpcmpeqd\t%t0, %t0, %t0";
5474 else if (GET_MODE_SIZE (mode) == 32)
5476 gcc_assert (TARGET_AVX);
5477 return "vpcmpeqd\t%x0, %x0, %x0";
5479 gcc_unreachable ();
5481 else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
5483 gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
5484 return "vpcmpeqd\t%x0, %x0, %x0";
5487 gcc_unreachable ();
5490 /* Returns true if INSN can be transformed from a memory load
5491 to a supported FP constant load. */
5493 bool
5494 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5496 rtx src = find_constant_src (insn);
5498 gcc_assert (REG_P (dst));
5500 if (src == NULL
5501 || (SSE_REGNO_P (REGNO (dst))
5502 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
5503 || (!TARGET_AVX512VL
5504 && EXT_REX_SSE_REGNO_P (REGNO (dst))
5505 && standard_sse_constant_p (src, GET_MODE (dst)) == 1)
5506 || (STACK_REGNO_P (REGNO (dst))
5507 && standard_80387_constant_p (src) < 1))
5508 return false;
5510 return true;
5513 /* Predicate for pre-reload splitters with associated instructions,
5514 which can match any time before the split1 pass (usually combine),
5515 then are unconditionally split in that pass and should not be
5516 matched again afterwards. */
5518 bool
5519 ix86_pre_reload_split (void)
5521 return (can_create_pseudo_p ()
5522 && !(cfun->curr_properties & PROP_rtl_split_insns));
5525 /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5526 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5527 TARGET_AVX512VL or it is a register to register move which can
5528 be done with zmm register move. */
5530 static const char *
5531 ix86_get_ssemov (rtx *operands, unsigned size,
5532 enum attr_mode insn_mode, machine_mode mode)
5534 char buf[128];
5535 bool misaligned_p = (misaligned_operand (operands[0], mode)
5536 || misaligned_operand (operands[1], mode));
5537 bool evex_reg_p = (size == 64
5538 || EXT_REX_SSE_REG_P (operands[0])
5539 || EXT_REX_SSE_REG_P (operands[1]));
5541 bool egpr_p = (TARGET_APX_EGPR
5542 && (x86_extended_rex2reg_mentioned_p (operands[0])
5543 || x86_extended_rex2reg_mentioned_p (operands[1])));
5544 bool egpr_vl = egpr_p && TARGET_AVX512VL;
5546 machine_mode scalar_mode;
5548 const char *opcode = NULL;
5549 enum
5551 opcode_int,
5552 opcode_float,
5553 opcode_double
5554 } type = opcode_int;
5556 switch (insn_mode)
5558 case MODE_V16SF:
5559 case MODE_V8SF:
5560 case MODE_V4SF:
5561 scalar_mode = E_SFmode;
5562 type = opcode_float;
5563 break;
5564 case MODE_V8DF:
5565 case MODE_V4DF:
5566 case MODE_V2DF:
5567 scalar_mode = E_DFmode;
5568 type = opcode_double;
5569 break;
5570 case MODE_XI:
5571 case MODE_OI:
5572 case MODE_TI:
5573 scalar_mode = GET_MODE_INNER (mode);
5574 break;
5575 default:
5576 gcc_unreachable ();
5579 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5580 we can only use zmm register move without memory operand. */
5581 if (evex_reg_p
5582 && !TARGET_AVX512VL
5583 && GET_MODE_SIZE (mode) < 64)
5585 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5586 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5587 AVX512VL is disabled, LRA can still generate reg to
5588 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5589 modes. */
5590 if (memory_operand (operands[0], mode)
5591 || memory_operand (operands[1], mode))
5592 gcc_unreachable ();
5593 size = 64;
5594 /* We need TARGET_EVEX512 to move into zmm register. */
5595 gcc_assert (TARGET_EVEX512);
5596 switch (type)
5598 case opcode_int:
5599 if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
5600 opcode = (misaligned_p
5601 ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
5602 : "vmovdqa64");
5603 else
5604 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5605 break;
5606 case opcode_float:
5607 opcode = misaligned_p ? "vmovups" : "vmovaps";
5608 break;
5609 case opcode_double:
5610 opcode = misaligned_p ? "vmovupd" : "vmovapd";
5611 break;
5614 else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5616 switch (scalar_mode)
5618 case E_HFmode:
5619 case E_BFmode:
5620 if (evex_reg_p || egpr_vl)
5621 opcode = (misaligned_p
5622 ? (TARGET_AVX512BW
5623 ? "vmovdqu16"
5624 : "vmovdqu64")
5625 : "vmovdqa64");
5626 else if (egpr_p)
5627 opcode = (misaligned_p
5628 ? (TARGET_AVX512BW
5629 ? "vmovdqu16"
5630 : "%vmovups")
5631 : "%vmovaps");
5632 else
5633 opcode = (misaligned_p
5634 ? (TARGET_AVX512BW
5635 ? "vmovdqu16"
5636 : "%vmovdqu")
5637 : "%vmovdqa");
5638 break;
5639 case E_SFmode:
5640 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5641 break;
5642 case E_DFmode:
5643 opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5644 break;
5645 case E_TFmode:
5646 if (evex_reg_p || egpr_vl)
5647 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5648 else if (egpr_p)
5649 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5650 else
5651 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5652 break;
5653 default:
5654 gcc_unreachable ();
5657 else if (SCALAR_INT_MODE_P (scalar_mode))
5659 switch (scalar_mode)
5661 case E_QImode:
5662 if (evex_reg_p || egpr_vl)
5663 opcode = (misaligned_p
5664 ? (TARGET_AVX512BW
5665 ? "vmovdqu8"
5666 : "vmovdqu64")
5667 : "vmovdqa64");
5668 else if (egpr_p)
5669 opcode = (misaligned_p
5670 ? (TARGET_AVX512BW
5671 ? "vmovdqu8"
5672 : "%vmovups")
5673 : "%vmovaps");
5674 else
5675 opcode = (misaligned_p
5676 ? (TARGET_AVX512BW
5677 ? "vmovdqu8"
5678 : "%vmovdqu")
5679 : "%vmovdqa");
5680 break;
5681 case E_HImode:
5682 if (evex_reg_p || egpr_vl)
5683 opcode = (misaligned_p
5684 ? (TARGET_AVX512BW
5685 ? "vmovdqu16"
5686 : "vmovdqu64")
5687 : "vmovdqa64");
5688 else if (egpr_p)
5689 opcode = (misaligned_p
5690 ? (TARGET_AVX512BW
5691 ? "vmovdqu16"
5692 : "%vmovups")
5693 : "%vmovaps");
5694 else
5695 opcode = (misaligned_p
5696 ? (TARGET_AVX512BW
5697 ? "vmovdqu16"
5698 : "%vmovdqu")
5699 : "%vmovdqa");
5700 break;
5701 case E_SImode:
5702 if (evex_reg_p || egpr_vl)
5703 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5704 else if (egpr_p)
5705 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5706 else
5707 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5708 break;
5709 case E_DImode:
5710 case E_TImode:
5711 case E_OImode:
5712 if (evex_reg_p || egpr_vl)
5713 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5714 else if (egpr_p)
5715 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5716 else
5717 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5718 break;
5719 case E_XImode:
5720 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5721 break;
5722 default:
5723 gcc_unreachable ();
5726 else
5727 gcc_unreachable ();
5729 switch (size)
5731 case 64:
5732 snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5733 opcode);
5734 break;
5735 case 32:
5736 snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5737 opcode);
5738 break;
5739 case 16:
5740 snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5741 opcode);
5742 break;
5743 default:
5744 gcc_unreachable ();
5746 output_asm_insn (buf, operands);
5747 return "";
5750 /* Return the template of the TYPE_SSEMOV instruction to move
5751 operands[1] into operands[0]. */
5753 const char *
5754 ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5756 machine_mode mode = GET_MODE (operands[0]);
5757 if (get_attr_type (insn) != TYPE_SSEMOV
5758 || mode != GET_MODE (operands[1]))
5759 gcc_unreachable ();
5761 enum attr_mode insn_mode = get_attr_mode (insn);
5763 switch (insn_mode)
5765 case MODE_XI:
5766 case MODE_V8DF:
5767 case MODE_V16SF:
5768 return ix86_get_ssemov (operands, 64, insn_mode, mode);
5770 case MODE_OI:
5771 case MODE_V4DF:
5772 case MODE_V8SF:
5773 return ix86_get_ssemov (operands, 32, insn_mode, mode);
5775 case MODE_TI:
5776 case MODE_V2DF:
5777 case MODE_V4SF:
5778 return ix86_get_ssemov (operands, 16, insn_mode, mode);
5780 case MODE_DI:
5781 /* Handle broken assemblers that require movd instead of movq. */
5782 if (GENERAL_REG_P (operands[0]))
5784 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5785 return "%vmovq\t{%1, %q0|%q0, %1}";
5786 else
5787 return "%vmovd\t{%1, %q0|%q0, %1}";
5789 else if (GENERAL_REG_P (operands[1]))
5791 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5792 return "%vmovq\t{%q1, %0|%0, %q1}";
5793 else
5794 return "%vmovd\t{%q1, %0|%0, %q1}";
5796 else
5797 return "%vmovq\t{%1, %0|%0, %1}";
5799 case MODE_SI:
5800 if (GENERAL_REG_P (operands[0]))
5801 return "%vmovd\t{%1, %k0|%k0, %1}";
5802 else if (GENERAL_REG_P (operands[1]))
5803 return "%vmovd\t{%k1, %0|%0, %k1}";
5804 else
5805 return "%vmovd\t{%1, %0|%0, %1}";
5807 case MODE_HI:
5808 if (GENERAL_REG_P (operands[0]))
5809 return "vmovw\t{%1, %k0|%k0, %1}";
5810 else if (GENERAL_REG_P (operands[1]))
5811 return "vmovw\t{%k1, %0|%0, %k1}";
5812 else
5813 return "vmovw\t{%1, %0|%0, %1}";
5815 case MODE_DF:
5816 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5817 return "vmovsd\t{%d1, %0|%0, %d1}";
5818 else
5819 return "%vmovsd\t{%1, %0|%0, %1}";
5821 case MODE_SF:
5822 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5823 return "vmovss\t{%d1, %0|%0, %d1}";
5824 else
5825 return "%vmovss\t{%1, %0|%0, %1}";
5827 case MODE_HF:
5828 case MODE_BF:
5829 if (REG_P (operands[0]) && REG_P (operands[1]))
5830 return "vmovsh\t{%d1, %0|%0, %d1}";
5831 else
5832 return "vmovsh\t{%1, %0|%0, %1}";
5834 case MODE_V1DF:
5835 gcc_assert (!TARGET_AVX);
5836 return "movlpd\t{%1, %0|%0, %1}";
5838 case MODE_V2SF:
5839 if (TARGET_AVX && REG_P (operands[0]))
5840 return "vmovlps\t{%1, %d0|%d0, %1}";
5841 else
5842 return "%vmovlps\t{%1, %0|%0, %1}";
5844 default:
5845 gcc_unreachable ();
5849 /* Returns true if OP contains a symbol reference */
5851 bool
5852 symbolic_reference_mentioned_p (rtx op)
5854 const char *fmt;
5855 int i;
5857 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5858 return true;
5860 fmt = GET_RTX_FORMAT (GET_CODE (op));
5861 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5863 if (fmt[i] == 'E')
5865 int j;
5867 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5868 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5869 return true;
5872 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5873 return true;
5876 return false;
5879 /* Return true if it is appropriate to emit `ret' instructions in the
5880 body of a function. Do this only if the epilogue is simple, needing a
5881 couple of insns. Prior to reloading, we can't tell how many registers
5882 must be saved, so return false then. Return false if there is no frame
5883 marker to de-allocate. */
5885 bool
5886 ix86_can_use_return_insn_p (void)
5888 if (ix86_function_ms_hook_prologue (current_function_decl))
5889 return false;
5891 if (ix86_function_naked (current_function_decl))
5892 return false;
5894 /* Don't use `ret' instruction in interrupt handler. */
5895 if (! reload_completed
5896 || frame_pointer_needed
5897 || cfun->machine->func_type != TYPE_NORMAL)
5898 return 0;
5900 /* Don't allow more than 32k pop, since that's all we can do
5901 with one instruction. */
5902 if (crtl->args.pops_args && crtl->args.size >= 32768)
5903 return 0;
5905 struct ix86_frame &frame = cfun->machine->frame;
5906 return (frame.stack_pointer_offset == UNITS_PER_WORD
5907 && (frame.nregs + frame.nsseregs) == 0);
5910 /* Return stack frame size. get_frame_size () returns used stack slots
5911 during compilation, which may be optimized out later. If stack frame
5912 is needed, stack_frame_required should be true. */
5914 static HOST_WIDE_INT
5915 ix86_get_frame_size (void)
5917 if (cfun->machine->stack_frame_required)
5918 return get_frame_size ();
5919 else
5920 return 0;
5923 /* Value should be nonzero if functions must have frame pointers.
5924 Zero means the frame pointer need not be set up (and parms may
5925 be accessed via the stack pointer) in functions that seem suitable. */
5927 static bool
5928 ix86_frame_pointer_required (void)
5930 /* If we accessed previous frames, then the generated code expects
5931 to be able to access the saved ebp value in our frame. */
5932 if (cfun->machine->accesses_prev_frame)
5933 return true;
5935 /* Several x86 os'es need a frame pointer for other reasons,
5936 usually pertaining to setjmp. */
5937 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5938 return true;
5940 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5941 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
5942 return true;
5944 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5945 allocation is 4GB. */
5946 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
5947 return true;
5949 /* SSE saves require frame-pointer when stack is misaligned. */
5950 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
5951 return true;
5953 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5954 turns off the frame pointer by default. Turn it back on now if
5955 we've not got a leaf function. */
5956 if (TARGET_OMIT_LEAF_FRAME_POINTER
5957 && (!crtl->is_leaf
5958 || ix86_current_function_calls_tls_descriptor))
5959 return true;
5961 /* Several versions of mcount for the x86 assumes that there is a
5962 frame, so we cannot allow profiling without a frame pointer. */
5963 if (crtl->profile && !flag_fentry)
5964 return true;
5966 return false;
5969 /* Record that the current function accesses previous call frames. */
5971 void
5972 ix86_setup_frame_addresses (void)
5974 cfun->machine->accesses_prev_frame = 1;
5977 #ifndef USE_HIDDEN_LINKONCE
5978 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5979 # define USE_HIDDEN_LINKONCE 1
5980 # else
5981 # define USE_HIDDEN_LINKONCE 0
5982 # endif
5983 #endif
5985 /* Label count for call and return thunks. It is used to make unique
5986 labels in call and return thunks. */
5987 static int indirectlabelno;
5989 /* True if call thunk function is needed. */
5990 static bool indirect_thunk_needed = false;
5992 /* Bit masks of integer registers, which contain branch target, used
5993 by call thunk functions. */
5994 static HARD_REG_SET indirect_thunks_used;
5996 /* True if return thunk function is needed. */
5997 static bool indirect_return_needed = false;
5999 /* True if return thunk function via CX is needed. */
6000 static bool indirect_return_via_cx;
6002 #ifndef INDIRECT_LABEL
6003 # define INDIRECT_LABEL "LIND"
6004 #endif
6006 /* Indicate what prefix is needed for an indirect branch. */
6007 enum indirect_thunk_prefix
6009 indirect_thunk_prefix_none,
6010 indirect_thunk_prefix_nt
6013 /* Return the prefix needed for an indirect branch INSN. */
6015 enum indirect_thunk_prefix
6016 indirect_thunk_need_prefix (rtx_insn *insn)
6018 enum indirect_thunk_prefix need_prefix;
6019 if ((cfun->machine->indirect_branch_type
6020 == indirect_branch_thunk_extern)
6021 && ix86_notrack_prefixed_insn_p (insn))
6023 /* NOTRACK prefix is only used with external thunk so that it
6024 can be properly updated to support CET at run-time. */
6025 need_prefix = indirect_thunk_prefix_nt;
6027 else
6028 need_prefix = indirect_thunk_prefix_none;
6029 return need_prefix;
6032 /* Fills in the label name that should be used for the indirect thunk. */
6034 static void
6035 indirect_thunk_name (char name[32], unsigned int regno,
6036 enum indirect_thunk_prefix need_prefix,
6037 bool ret_p)
6039 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
6040 gcc_unreachable ();
6042 if (USE_HIDDEN_LINKONCE)
6044 const char *prefix;
6046 if (need_prefix == indirect_thunk_prefix_nt
6047 && regno != INVALID_REGNUM)
6049 /* NOTRACK prefix is only used with external thunk via
6050 register so that NOTRACK prefix can be added to indirect
6051 branch via register to support CET at run-time. */
6052 prefix = "_nt";
6054 else
6055 prefix = "";
6057 const char *ret = ret_p ? "return" : "indirect";
6059 if (regno != INVALID_REGNUM)
6061 const char *reg_prefix;
6062 if (LEGACY_INT_REGNO_P (regno))
6063 reg_prefix = TARGET_64BIT ? "r" : "e";
6064 else
6065 reg_prefix = "";
6066 sprintf (name, "__x86_%s_thunk%s_%s%s",
6067 ret, prefix, reg_prefix, reg_names[regno]);
6069 else
6070 sprintf (name, "__x86_%s_thunk%s", ret, prefix);
6072 else
6074 if (regno != INVALID_REGNUM)
6075 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
6076 else
6078 if (ret_p)
6079 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
6080 else
6081 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
6086 /* Output a call and return thunk for indirect branch. If REGNO != -1,
6087 the function address is in REGNO and the call and return thunk looks like:
6089 call L2
6091 pause
6092 lfence
6093 jmp L1
6095 mov %REG, (%sp)
6098 Otherwise, the function address is on the top of stack and the
6099 call and return thunk looks like:
6101 call L2
6103 pause
6104 lfence
6105 jmp L1
6107 lea WORD_SIZE(%sp), %sp
6111 static void
6112 output_indirect_thunk (unsigned int regno)
6114 char indirectlabel1[32];
6115 char indirectlabel2[32];
6117 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
6118 indirectlabelno++);
6119 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
6120 indirectlabelno++);
6122 /* Call */
6123 fputs ("\tcall\t", asm_out_file);
6124 assemble_name_raw (asm_out_file, indirectlabel2);
6125 fputc ('\n', asm_out_file);
6127 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
6129 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
6130 Usage of both pause + lfence is compromise solution. */
6131 fprintf (asm_out_file, "\tpause\n\tlfence\n");
6133 /* Jump. */
6134 fputs ("\tjmp\t", asm_out_file);
6135 assemble_name_raw (asm_out_file, indirectlabel1);
6136 fputc ('\n', asm_out_file);
6138 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
6140 /* The above call insn pushed a word to stack. Adjust CFI info. */
6141 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
6143 if (! dwarf2out_do_cfi_asm ())
6145 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6146 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
6147 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
6148 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6150 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6151 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
6152 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
6153 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6154 dwarf2out_emit_cfi (xcfi);
6157 if (regno != INVALID_REGNUM)
6159 /* MOV. */
6160 rtx xops[2];
6161 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
6162 xops[1] = gen_rtx_REG (word_mode, regno);
6163 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
6165 else
6167 /* LEA. */
6168 rtx xops[2];
6169 xops[0] = stack_pointer_rtx;
6170 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
6171 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
6174 fputs ("\tret\n", asm_out_file);
6175 if ((ix86_harden_sls & harden_sls_return))
6176 fputs ("\tint3\n", asm_out_file);
6179 /* Output a funtion with a call and return thunk for indirect branch.
6180 If REGNO != INVALID_REGNUM, the function address is in REGNO.
6181 Otherwise, the function address is on the top of stack. Thunk is
6182 used for function return if RET_P is true. */
6184 static void
6185 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
6186 unsigned int regno, bool ret_p)
6188 char name[32];
6189 tree decl;
6191 /* Create __x86_indirect_thunk. */
6192 indirect_thunk_name (name, regno, need_prefix, ret_p);
6193 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6194 get_identifier (name),
6195 build_function_type_list (void_type_node, NULL_TREE));
6196 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6197 NULL_TREE, void_type_node);
6198 TREE_PUBLIC (decl) = 1;
6199 TREE_STATIC (decl) = 1;
6200 DECL_IGNORED_P (decl) = 1;
6202 #if TARGET_MACHO
6203 if (TARGET_MACHO)
6205 switch_to_section (darwin_sections[picbase_thunk_section]);
6206 fputs ("\t.weak_definition\t", asm_out_file);
6207 assemble_name (asm_out_file, name);
6208 fputs ("\n\t.private_extern\t", asm_out_file);
6209 assemble_name (asm_out_file, name);
6210 putc ('\n', asm_out_file);
6211 ASM_OUTPUT_LABEL (asm_out_file, name);
6212 DECL_WEAK (decl) = 1;
6214 else
6215 #endif
6216 if (USE_HIDDEN_LINKONCE)
6218 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6220 targetm.asm_out.unique_section (decl, 0);
6221 switch_to_section (get_named_section (decl, NULL, 0));
6223 targetm.asm_out.globalize_label (asm_out_file, name);
6224 fputs ("\t.hidden\t", asm_out_file);
6225 assemble_name (asm_out_file, name);
6226 putc ('\n', asm_out_file);
6227 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6229 else
6231 switch_to_section (text_section);
6232 ASM_OUTPUT_LABEL (asm_out_file, name);
6235 DECL_INITIAL (decl) = make_node (BLOCK);
6236 current_function_decl = decl;
6237 allocate_struct_function (decl, false);
6238 init_function_start (decl);
6239 /* We're about to hide the function body from callees of final_* by
6240 emitting it directly; tell them we're a thunk, if they care. */
6241 cfun->is_thunk = true;
6242 first_function_block_is_cold = false;
6243 /* Make sure unwind info is emitted for the thunk if needed. */
6244 final_start_function (emit_barrier (), asm_out_file, 1);
6246 output_indirect_thunk (regno);
6248 final_end_function ();
6249 init_insn_lengths ();
6250 free_after_compilation (cfun);
6251 set_cfun (NULL);
6252 current_function_decl = NULL;
6255 static int pic_labels_used;
6257 /* Fills in the label name that should be used for a pc thunk for
6258 the given register. */
6260 static void
6261 get_pc_thunk_name (char name[32], unsigned int regno)
6263 gcc_assert (!TARGET_64BIT);
6265 if (USE_HIDDEN_LINKONCE)
6266 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
6267 else
6268 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6272 /* This function generates code for -fpic that loads %ebx with
6273 the return address of the caller and then returns. */
6275 static void
6276 ix86_code_end (void)
6278 rtx xops[2];
6279 unsigned int regno;
6281 if (indirect_return_needed)
6282 output_indirect_thunk_function (indirect_thunk_prefix_none,
6283 INVALID_REGNUM, true);
6284 if (indirect_return_via_cx)
6285 output_indirect_thunk_function (indirect_thunk_prefix_none,
6286 CX_REG, true);
6287 if (indirect_thunk_needed)
6288 output_indirect_thunk_function (indirect_thunk_prefix_none,
6289 INVALID_REGNUM, false);
6291 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
6293 if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6294 output_indirect_thunk_function (indirect_thunk_prefix_none,
6295 regno, false);
6298 for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++)
6300 if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6301 output_indirect_thunk_function (indirect_thunk_prefix_none,
6302 regno, false);
6305 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
6307 char name[32];
6308 tree decl;
6310 if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6311 output_indirect_thunk_function (indirect_thunk_prefix_none,
6312 regno, false);
6314 if (!(pic_labels_used & (1 << regno)))
6315 continue;
6317 get_pc_thunk_name (name, regno);
6319 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6320 get_identifier (name),
6321 build_function_type_list (void_type_node, NULL_TREE));
6322 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6323 NULL_TREE, void_type_node);
6324 TREE_PUBLIC (decl) = 1;
6325 TREE_STATIC (decl) = 1;
6326 DECL_IGNORED_P (decl) = 1;
6328 #if TARGET_MACHO
6329 if (TARGET_MACHO)
6331 switch_to_section (darwin_sections[picbase_thunk_section]);
6332 fputs ("\t.weak_definition\t", asm_out_file);
6333 assemble_name (asm_out_file, name);
6334 fputs ("\n\t.private_extern\t", asm_out_file);
6335 assemble_name (asm_out_file, name);
6336 putc ('\n', asm_out_file);
6337 ASM_OUTPUT_LABEL (asm_out_file, name);
6338 DECL_WEAK (decl) = 1;
6340 else
6341 #endif
6342 if (USE_HIDDEN_LINKONCE)
6344 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6346 targetm.asm_out.unique_section (decl, 0);
6347 switch_to_section (get_named_section (decl, NULL, 0));
6349 targetm.asm_out.globalize_label (asm_out_file, name);
6350 fputs ("\t.hidden\t", asm_out_file);
6351 assemble_name (asm_out_file, name);
6352 putc ('\n', asm_out_file);
6353 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6355 else
6357 switch_to_section (text_section);
6358 ASM_OUTPUT_LABEL (asm_out_file, name);
6361 DECL_INITIAL (decl) = make_node (BLOCK);
6362 current_function_decl = decl;
6363 allocate_struct_function (decl, false);
6364 init_function_start (decl);
6365 /* We're about to hide the function body from callees of final_* by
6366 emitting it directly; tell them we're a thunk, if they care. */
6367 cfun->is_thunk = true;
6368 first_function_block_is_cold = false;
6369 /* Make sure unwind info is emitted for the thunk if needed. */
6370 final_start_function (emit_barrier (), asm_out_file, 1);
6372 /* Pad stack IP move with 4 instructions (two NOPs count
6373 as one instruction). */
6374 if (TARGET_PAD_SHORT_FUNCTION)
6376 int i = 8;
6378 while (i--)
6379 fputs ("\tnop\n", asm_out_file);
6382 xops[0] = gen_rtx_REG (Pmode, regno);
6383 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6384 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6385 fputs ("\tret\n", asm_out_file);
6386 final_end_function ();
6387 init_insn_lengths ();
6388 free_after_compilation (cfun);
6389 set_cfun (NULL);
6390 current_function_decl = NULL;
6393 if (flag_split_stack)
6394 file_end_indicate_split_stack ();
6397 /* Emit code for the SET_GOT patterns. */
6399 const char *
6400 output_set_got (rtx dest, rtx label)
6402 rtx xops[3];
6404 xops[0] = dest;
6406 if (TARGET_VXWORKS_RTP && flag_pic)
6408 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6409 xops[2] = gen_rtx_MEM (Pmode,
6410 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6411 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6413 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6414 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6415 an unadorned address. */
6416 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6417 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6418 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6419 return "";
6422 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6424 if (flag_pic)
6426 char name[32];
6427 get_pc_thunk_name (name, REGNO (dest));
6428 pic_labels_used |= 1 << REGNO (dest);
6430 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6431 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6432 output_asm_insn ("%!call\t%X2", xops);
6434 #if TARGET_MACHO
6435 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6436 This is what will be referenced by the Mach-O PIC subsystem. */
6437 if (machopic_should_output_picbase_label () || !label)
6438 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6440 /* When we are restoring the pic base at the site of a nonlocal label,
6441 and we decided to emit the pic base above, we will still output a
6442 local label used for calculating the correction offset (even though
6443 the offset will be 0 in that case). */
6444 if (label)
6445 targetm.asm_out.internal_label (asm_out_file, "L",
6446 CODE_LABEL_NUMBER (label));
6447 #endif
6449 else
6451 if (TARGET_MACHO)
6452 /* We don't need a pic base, we're not producing pic. */
6453 gcc_unreachable ();
6455 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6456 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6457 targetm.asm_out.internal_label (asm_out_file, "L",
6458 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6461 if (!TARGET_MACHO)
6462 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6464 return "";
6467 /* Generate an "push" pattern for input ARG. */
6470 gen_push (rtx arg, bool ppx_p)
6472 struct machine_function *m = cfun->machine;
6474 if (m->fs.cfa_reg == stack_pointer_rtx)
6475 m->fs.cfa_offset += UNITS_PER_WORD;
6476 m->fs.sp_offset += UNITS_PER_WORD;
6478 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6479 arg = gen_rtx_REG (word_mode, REGNO (arg));
6481 rtx stack = gen_rtx_MEM (word_mode,
6482 gen_rtx_PRE_DEC (Pmode,
6483 stack_pointer_rtx));
6484 return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg);
6488 gen_pushfl (void)
6490 struct machine_function *m = cfun->machine;
6491 rtx flags, mem;
6493 if (m->fs.cfa_reg == stack_pointer_rtx)
6494 m->fs.cfa_offset += UNITS_PER_WORD;
6495 m->fs.sp_offset += UNITS_PER_WORD;
6497 flags = gen_rtx_REG (CCmode, FLAGS_REG);
6499 mem = gen_rtx_MEM (word_mode,
6500 gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
6502 return gen_pushfl2 (word_mode, mem, flags);
6505 /* Generate an "pop" pattern for input ARG. */
6508 gen_pop (rtx arg, bool ppx_p)
6510 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6511 arg = gen_rtx_REG (word_mode, REGNO (arg));
6513 rtx stack = gen_rtx_MEM (word_mode,
6514 gen_rtx_POST_INC (Pmode,
6515 stack_pointer_rtx));
6517 return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack);
6521 gen_popfl (void)
6523 rtx flags, mem;
6525 flags = gen_rtx_REG (CCmode, FLAGS_REG);
6527 mem = gen_rtx_MEM (word_mode,
6528 gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
6530 return gen_popfl1 (word_mode, flags, mem);
6533 /* Generate a "push2" pattern for input ARG. */
6535 gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
6537 struct machine_function *m = cfun->machine;
6538 const int offset = UNITS_PER_WORD * 2;
6540 if (m->fs.cfa_reg == stack_pointer_rtx)
6541 m->fs.cfa_offset += offset;
6542 m->fs.sp_offset += offset;
6544 if (REG_P (reg1) && GET_MODE (reg1) != word_mode)
6545 reg1 = gen_rtx_REG (word_mode, REGNO (reg1));
6547 if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
6548 reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
6550 return ppx_p ? gen_push2p_di (mem, reg1, reg2):
6551 gen_push2_di (mem, reg1, reg2);
6554 /* Return >= 0 if there is an unused call-clobbered register available
6555 for the entire function. */
6557 static unsigned int
6558 ix86_select_alt_pic_regnum (void)
6560 if (ix86_use_pseudo_pic_reg ())
6561 return INVALID_REGNUM;
6563 if (crtl->is_leaf
6564 && !crtl->profile
6565 && !ix86_current_function_calls_tls_descriptor)
6567 int i, drap;
6568 /* Can't use the same register for both PIC and DRAP. */
6569 if (crtl->drap_reg)
6570 drap = REGNO (crtl->drap_reg);
6571 else
6572 drap = -1;
6573 for (i = 2; i >= 0; --i)
6574 if (i != drap && !df_regs_ever_live_p (i))
6575 return i;
6578 return INVALID_REGNUM;
6581 /* Return true if REGNO is used by the epilogue. */
6583 bool
6584 ix86_epilogue_uses (int regno)
6586 /* If there are no caller-saved registers, we preserve all registers,
6587 except for MMX and x87 registers which aren't supported when saving
6588 and restoring registers. Don't explicitly save SP register since
6589 it is always preserved. */
6590 return (epilogue_completed
6591 && (cfun->machine->call_saved_registers
6592 == TYPE_NO_CALLER_SAVED_REGISTERS)
6593 && !fixed_regs[regno]
6594 && !STACK_REGNO_P (regno)
6595 && !MMX_REGNO_P (regno));
6598 /* Return nonzero if register REGNO can be used as a scratch register
6599 in peephole2. */
6601 static bool
6602 ix86_hard_regno_scratch_ok (unsigned int regno)
6604 /* If there are no caller-saved registers, we can't use any register
6605 as a scratch register after epilogue and use REGNO as scratch
6606 register only if it has been used before to avoid saving and
6607 restoring it. */
6608 return ((cfun->machine->call_saved_registers
6609 != TYPE_NO_CALLER_SAVED_REGISTERS)
6610 || (!epilogue_completed
6611 && df_regs_ever_live_p (regno)));
6614 /* Return TRUE if we need to save REGNO. */
6616 bool
6617 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6619 rtx reg;
6621 switch (cfun->machine->call_saved_registers)
6623 case TYPE_DEFAULT_CALL_SAVED_REGISTERS:
6624 break;
6626 case TYPE_NO_CALLER_SAVED_REGISTERS:
6627 /* If there are no caller-saved registers, we preserve all
6628 registers, except for MMX and x87 registers which aren't
6629 supported when saving and restoring registers. Don't
6630 explicitly save SP register since it is always preserved.
6632 Don't preserve registers used for function return value. */
6633 reg = crtl->return_rtx;
6634 if (reg)
6636 unsigned int i = REGNO (reg);
6637 unsigned int nregs = REG_NREGS (reg);
6638 while (nregs-- > 0)
6639 if ((i + nregs) == regno)
6640 return false;
6643 return (df_regs_ever_live_p (regno)
6644 && !fixed_regs[regno]
6645 && !STACK_REGNO_P (regno)
6646 && !MMX_REGNO_P (regno)
6647 && (regno != HARD_FRAME_POINTER_REGNUM
6648 || !frame_pointer_needed));
6650 case TYPE_NO_CALLEE_SAVED_REGISTERS:
6651 return false;
6654 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6655 && pic_offset_table_rtx)
6657 if (ix86_use_pseudo_pic_reg ())
6659 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6660 _mcount in prologue. */
6661 if (!TARGET_64BIT && flag_pic && crtl->profile)
6662 return true;
6664 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6665 || crtl->profile
6666 || crtl->calls_eh_return
6667 || crtl->uses_const_pool
6668 || cfun->has_nonlocal_label)
6669 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6672 if (crtl->calls_eh_return && maybe_eh_return)
6674 unsigned i;
6675 for (i = 0; ; i++)
6677 unsigned test = EH_RETURN_DATA_REGNO (i);
6678 if (test == INVALID_REGNUM)
6679 break;
6680 if (test == regno)
6681 return true;
6685 if (ignore_outlined && cfun->machine->call_ms2sysv)
6687 unsigned count = cfun->machine->call_ms2sysv_extra_regs
6688 + xlogue_layout::MIN_REGS;
6689 if (xlogue_layout::is_stub_managed_reg (regno, count))
6690 return false;
6693 if (crtl->drap_reg
6694 && regno == REGNO (crtl->drap_reg)
6695 && !cfun->machine->no_drap_save_restore)
6696 return true;
6698 return (df_regs_ever_live_p (regno)
6699 && !call_used_or_fixed_reg_p (regno)
6700 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6703 /* Return number of saved general prupose registers. */
6705 static int
6706 ix86_nsaved_regs (void)
6708 int nregs = 0;
6709 int regno;
6711 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6712 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6713 nregs ++;
6714 return nregs;
6717 /* Return number of saved SSE registers. */
6719 static int
6720 ix86_nsaved_sseregs (void)
6722 int nregs = 0;
6723 int regno;
6725 if (!TARGET_64BIT_MS_ABI)
6726 return 0;
6727 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6728 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6729 nregs ++;
6730 return nregs;
6733 /* Given FROM and TO register numbers, say whether this elimination is
6734 allowed. If stack alignment is needed, we can only replace argument
6735 pointer with hard frame pointer, or replace frame pointer with stack
6736 pointer. Otherwise, frame pointer elimination is automatically
6737 handled and all other eliminations are valid. */
6739 static bool
6740 ix86_can_eliminate (const int from, const int to)
6742 if (stack_realign_fp)
6743 return ((from == ARG_POINTER_REGNUM
6744 && to == HARD_FRAME_POINTER_REGNUM)
6745 || (from == FRAME_POINTER_REGNUM
6746 && to == STACK_POINTER_REGNUM));
6747 else
6748 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6751 /* Return the offset between two registers, one to be eliminated, and the other
6752 its replacement, at the start of a routine. */
6754 HOST_WIDE_INT
6755 ix86_initial_elimination_offset (int from, int to)
6757 struct ix86_frame &frame = cfun->machine->frame;
6759 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6760 return frame.hard_frame_pointer_offset;
6761 else if (from == FRAME_POINTER_REGNUM
6762 && to == HARD_FRAME_POINTER_REGNUM)
6763 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6764 else
6766 gcc_assert (to == STACK_POINTER_REGNUM);
6768 if (from == ARG_POINTER_REGNUM)
6769 return frame.stack_pointer_offset;
6771 gcc_assert (from == FRAME_POINTER_REGNUM);
6772 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6776 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6777 void
6778 warn_once_call_ms2sysv_xlogues (const char *feature)
6780 static bool warned_once = false;
6781 if (!warned_once)
6783 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6784 feature);
6785 warned_once = true;
6789 /* Return the probing interval for -fstack-clash-protection. */
6791 static HOST_WIDE_INT
6792 get_probe_interval (void)
6794 if (flag_stack_clash_protection)
6795 return (HOST_WIDE_INT_1U
6796 << param_stack_clash_protection_probe_interval);
6797 else
6798 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6801 /* When using -fsplit-stack, the allocation routines set a field in
6802 the TCB to the bottom of the stack plus this much space, measured
6803 in bytes. */
6805 #define SPLIT_STACK_AVAILABLE 256
6807 /* Return true if push2/pop2 can be generated. */
6809 static bool
6810 ix86_can_use_push2pop2 (void)
6812 /* Use push2/pop2 only if the incoming stack is 16-byte aligned. */
6813 unsigned int incoming_stack_boundary
6814 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
6815 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
6816 return incoming_stack_boundary % 128 == 0;
6819 /* Helper function to determine whether push2/pop2 can be used in prologue or
6820 epilogue for register save/restore. */
6821 static bool
6822 ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
6824 if (!ix86_can_use_push2pop2 ())
6825 return false;
6826 int aligned = cfun->machine->fs.sp_offset % 16 == 0;
6827 return TARGET_APX_PUSH2POP2
6828 && !cfun->machine->frame.save_regs_using_mov
6829 && cfun->machine->func_type == TYPE_NORMAL
6830 && (nregs + aligned) >= 3;
6833 /* Fill structure ix86_frame about frame of currently computed function. */
6835 static void
6836 ix86_compute_frame_layout (void)
6838 struct ix86_frame *frame = &cfun->machine->frame;
6839 struct machine_function *m = cfun->machine;
6840 unsigned HOST_WIDE_INT stack_alignment_needed;
6841 HOST_WIDE_INT offset;
6842 unsigned HOST_WIDE_INT preferred_alignment;
6843 HOST_WIDE_INT size = ix86_get_frame_size ();
6844 HOST_WIDE_INT to_allocate;
6846 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6847 * ms_abi functions that call a sysv function. We now need to prune away
6848 * cases where it should be disabled. */
6849 if (TARGET_64BIT && m->call_ms2sysv)
6851 gcc_assert (TARGET_64BIT_MS_ABI);
6852 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
6853 gcc_assert (!TARGET_SEH);
6854 gcc_assert (TARGET_SSE);
6855 gcc_assert (!ix86_using_red_zone ());
6857 if (crtl->calls_eh_return)
6859 gcc_assert (!reload_completed);
6860 m->call_ms2sysv = false;
6861 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6864 else if (ix86_static_chain_on_stack)
6866 gcc_assert (!reload_completed);
6867 m->call_ms2sysv = false;
6868 warn_once_call_ms2sysv_xlogues ("static call chains");
6871 /* Finally, compute which registers the stub will manage. */
6872 else
6874 unsigned count = xlogue_layout::count_stub_managed_regs ();
6875 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
6876 m->call_ms2sysv_pad_in = 0;
6880 frame->nregs = ix86_nsaved_regs ();
6881 frame->nsseregs = ix86_nsaved_sseregs ();
6883 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6884 except for function prologues, leaf functions and when the defult
6885 incoming stack boundary is overriden at command line or via
6886 force_align_arg_pointer attribute.
6888 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6889 at call sites, including profile function calls.
6891 For APX push2/pop2, the stack also requires 128b alignment. */
6892 if ((ix86_pro_and_epilogue_can_use_push2pop2 (frame->nregs)
6893 && crtl->preferred_stack_boundary < 128)
6894 || (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
6895 && crtl->preferred_stack_boundary < 128)
6896 && (!crtl->is_leaf || cfun->calls_alloca != 0
6897 || ix86_current_function_calls_tls_descriptor
6898 || (TARGET_MACHO && crtl->profile)
6899 || ix86_incoming_stack_boundary < 128)))
6901 crtl->preferred_stack_boundary = 128;
6902 if (crtl->stack_alignment_needed < 128)
6903 crtl->stack_alignment_needed = 128;
6906 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6907 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6909 gcc_assert (!size || stack_alignment_needed);
6910 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6911 gcc_assert (preferred_alignment <= stack_alignment_needed);
6913 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6914 gcc_assert (TARGET_64BIT || !frame->nsseregs);
6915 if (TARGET_64BIT && m->call_ms2sysv)
6917 gcc_assert (stack_alignment_needed >= 16);
6918 gcc_assert (!frame->nsseregs);
6921 /* For SEH we have to limit the amount of code movement into the prologue.
6922 At present we do this via a BLOCKAGE, at which point there's very little
6923 scheduling that can be done, which means that there's very little point
6924 in doing anything except PUSHs. */
6925 if (TARGET_SEH)
6926 m->use_fast_prologue_epilogue = false;
6927 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
6929 int count = frame->nregs;
6930 struct cgraph_node *node = cgraph_node::get (current_function_decl);
6932 /* The fast prologue uses move instead of push to save registers. This
6933 is significantly longer, but also executes faster as modern hardware
6934 can execute the moves in parallel, but can't do that for push/pop.
6936 Be careful about choosing what prologue to emit: When function takes
6937 many instructions to execute we may use slow version as well as in
6938 case function is known to be outside hot spot (this is known with
6939 feedback only). Weight the size of function by number of registers
6940 to save as it is cheap to use one or two push instructions but very
6941 slow to use many of them.
6943 Calling this hook multiple times with the same frame requirements
6944 must produce the same layout, since the RA might otherwise be
6945 unable to reach a fixed point or might fail its final sanity checks.
6946 This means that once we've assumed that a function does or doesn't
6947 have a particular size, we have to stick to that assumption
6948 regardless of how the function has changed since. */
6949 if (count)
6950 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6951 if (node->frequency < NODE_FREQUENCY_NORMAL
6952 || (flag_branch_probabilities
6953 && node->frequency < NODE_FREQUENCY_HOT))
6954 m->use_fast_prologue_epilogue = false;
6955 else
6957 if (count != frame->expensive_count)
6959 frame->expensive_count = count;
6960 frame->expensive_p = expensive_function_p (count);
6962 m->use_fast_prologue_epilogue = !frame->expensive_p;
6966 frame->save_regs_using_mov
6967 = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
6969 /* Skip return address and error code in exception handler. */
6970 offset = INCOMING_FRAME_SP_OFFSET;
6972 /* Skip pushed static chain. */
6973 if (ix86_static_chain_on_stack)
6974 offset += UNITS_PER_WORD;
6976 /* Skip saved base pointer. */
6977 if (frame_pointer_needed)
6978 offset += UNITS_PER_WORD;
6979 frame->hfp_save_offset = offset;
6981 /* The traditional frame pointer location is at the top of the frame. */
6982 frame->hard_frame_pointer_offset = offset;
6984 /* Register save area */
6985 offset += frame->nregs * UNITS_PER_WORD;
6986 frame->reg_save_offset = offset;
6988 /* Calculate the size of the va-arg area (not including padding, if any). */
6989 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
6991 /* Also adjust stack_realign_offset for the largest alignment of
6992 stack slot actually used. */
6993 if (stack_realign_fp
6994 || (cfun->machine->max_used_stack_alignment != 0
6995 && (offset % cfun->machine->max_used_stack_alignment) != 0))
6997 /* We may need a 16-byte aligned stack for the remainder of the
6998 register save area, but the stack frame for the local function
6999 may require a greater alignment if using AVX/2/512. In order
7000 to avoid wasting space, we first calculate the space needed for
7001 the rest of the register saves, add that to the stack pointer,
7002 and then realign the stack to the boundary of the start of the
7003 frame for the local function. */
7004 HOST_WIDE_INT space_needed = 0;
7005 HOST_WIDE_INT sse_reg_space_needed = 0;
7007 if (TARGET_64BIT)
7009 if (m->call_ms2sysv)
7011 m->call_ms2sysv_pad_in = 0;
7012 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
7015 else if (frame->nsseregs)
7016 /* The only ABI that has saved SSE registers (Win64) also has a
7017 16-byte aligned default stack. However, many programs violate
7018 the ABI, and Wine64 forces stack realignment to compensate. */
7019 space_needed = frame->nsseregs * 16;
7021 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
7023 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
7024 rounding to be pedantic. */
7025 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
7027 else
7028 space_needed = frame->va_arg_size;
7030 /* Record the allocation size required prior to the realignment AND. */
7031 frame->stack_realign_allocate = space_needed;
7033 /* The re-aligned stack starts at frame->stack_realign_offset. Values
7034 before this point are not directly comparable with values below
7035 this point. Use sp_valid_at to determine if the stack pointer is
7036 valid for a given offset, fp_valid_at for the frame pointer, or
7037 choose_baseaddr to have a base register chosen for you.
7039 Note that the result of (frame->stack_realign_offset
7040 & (stack_alignment_needed - 1)) may not equal zero. */
7041 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
7042 frame->stack_realign_offset = offset - space_needed;
7043 frame->sse_reg_save_offset = frame->stack_realign_offset
7044 + sse_reg_space_needed;
7046 else
7048 frame->stack_realign_offset = offset;
7050 if (TARGET_64BIT && m->call_ms2sysv)
7052 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
7053 offset += xlogue_layout::get_instance ().get_stack_space_used ();
7056 /* Align and set SSE register save area. */
7057 else if (frame->nsseregs)
7059 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
7060 required and the DRAP re-alignment boundary is at least 16 bytes,
7061 then we want the SSE register save area properly aligned. */
7062 if (ix86_incoming_stack_boundary >= 128
7063 || (stack_realign_drap && stack_alignment_needed >= 16))
7064 offset = ROUND_UP (offset, 16);
7065 offset += frame->nsseregs * 16;
7067 frame->sse_reg_save_offset = offset;
7068 offset += frame->va_arg_size;
7071 /* Align start of frame for local function. When a function call
7072 is removed, it may become a leaf function. But if argument may
7073 be passed on stack, we need to align the stack when there is no
7074 tail call. */
7075 if (m->call_ms2sysv
7076 || frame->va_arg_size != 0
7077 || size != 0
7078 || !crtl->is_leaf
7079 || (!crtl->tail_call_emit
7080 && cfun->machine->outgoing_args_on_stack)
7081 || cfun->calls_alloca
7082 || ix86_current_function_calls_tls_descriptor)
7083 offset = ROUND_UP (offset, stack_alignment_needed);
7085 /* Frame pointer points here. */
7086 frame->frame_pointer_offset = offset;
7088 offset += size;
7090 /* Add outgoing arguments area. Can be skipped if we eliminated
7091 all the function calls as dead code.
7092 Skipping is however impossible when function calls alloca. Alloca
7093 expander assumes that last crtl->outgoing_args_size
7094 of stack frame are unused. */
7095 if (ACCUMULATE_OUTGOING_ARGS
7096 && (!crtl->is_leaf || cfun->calls_alloca
7097 || ix86_current_function_calls_tls_descriptor))
7099 offset += crtl->outgoing_args_size;
7100 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7102 else
7103 frame->outgoing_arguments_size = 0;
7105 /* Align stack boundary. Only needed if we're calling another function
7106 or using alloca. */
7107 if (!crtl->is_leaf || cfun->calls_alloca
7108 || ix86_current_function_calls_tls_descriptor)
7109 offset = ROUND_UP (offset, preferred_alignment);
7111 /* We've reached end of stack frame. */
7112 frame->stack_pointer_offset = offset;
7114 /* Size prologue needs to allocate. */
7115 to_allocate = offset - frame->sse_reg_save_offset;
7117 if ((!to_allocate && frame->nregs <= 1)
7118 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
7119 /* If static stack checking is enabled and done with probes,
7120 the registers need to be saved before allocating the frame. */
7121 || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
7122 /* If stack clash probing needs a loop, then it needs a
7123 scratch register. But the returned register is only guaranteed
7124 to be safe to use after register saves are complete. So if
7125 stack clash protections are enabled and the allocated frame is
7126 larger than the probe interval, then use pushes to save
7127 callee saved registers. */
7128 || (flag_stack_clash_protection
7129 && !ix86_target_stack_probe ()
7130 && to_allocate > get_probe_interval ()))
7131 frame->save_regs_using_mov = false;
7133 if (ix86_using_red_zone ()
7134 && crtl->sp_is_unchanging
7135 && crtl->is_leaf
7136 && !ix86_pc_thunk_call_expanded
7137 && !ix86_current_function_calls_tls_descriptor)
7139 frame->red_zone_size = to_allocate;
7140 if (frame->save_regs_using_mov)
7141 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7142 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7143 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7145 else
7146 frame->red_zone_size = 0;
7147 frame->stack_pointer_offset -= frame->red_zone_size;
7149 /* The SEH frame pointer location is near the bottom of the frame.
7150 This is enforced by the fact that the difference between the
7151 stack pointer and the frame pointer is limited to 240 bytes in
7152 the unwind data structure. */
7153 if (TARGET_SEH)
7155 /* Force the frame pointer to point at or below the lowest register save
7156 area, see the SEH code in config/i386/winnt.cc for the rationale. */
7157 frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
7159 /* If we can leave the frame pointer where it is, do so; however return
7160 the establisher frame for __builtin_frame_address (0) or else if the
7161 frame overflows the SEH maximum frame size.
7163 Note that the value returned by __builtin_frame_address (0) is quite
7164 constrained, because setjmp is piggybacked on the SEH machinery with
7165 recent versions of MinGW:
7167 # elif defined(__SEH__)
7168 # if defined(__aarch64__) || defined(_ARM64_)
7169 # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
7170 # elif (__MINGW_GCC_VERSION < 40702)
7171 # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
7172 # else
7173 # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
7174 # endif
7176 and the second argument passed to _setjmp, if not null, is forwarded
7177 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
7178 built an ExceptionRecord on the fly describing the setjmp buffer). */
7179 const HOST_WIDE_INT diff
7180 = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
7181 if (diff <= 255 && !crtl->accesses_prior_frames)
7183 /* The resulting diff will be a multiple of 16 lower than 255,
7184 i.e. at most 240 as required by the unwind data structure. */
7185 frame->hard_frame_pointer_offset += (diff & 15);
7187 else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
7189 /* Ideally we'd determine what portion of the local stack frame
7190 (within the constraint of the lowest 240) is most heavily used.
7191 But without that complication, simply bias the frame pointer
7192 by 128 bytes so as to maximize the amount of the local stack
7193 frame that is addressable with 8-bit offsets. */
7194 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
7196 else
7197 frame->hard_frame_pointer_offset = frame->hfp_save_offset;
7201 /* This is semi-inlined memory_address_length, but simplified
7202 since we know that we're always dealing with reg+offset, and
7203 to avoid having to create and discard all that rtl. */
7205 static inline int
7206 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
7208 int len = 4;
7210 if (offset == 0)
7212 /* EBP and R13 cannot be encoded without an offset. */
7213 len = (regno == BP_REG || regno == R13_REG);
7215 else if (IN_RANGE (offset, -128, 127))
7216 len = 1;
7218 /* ESP and R12 must be encoded with a SIB byte. */
7219 if (regno == SP_REG || regno == R12_REG)
7220 len++;
7222 return len;
7225 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
7226 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7228 static bool
7229 sp_valid_at (HOST_WIDE_INT cfa_offset)
7231 const struct machine_frame_state &fs = cfun->machine->fs;
7232 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
7234 /* Validate that the cfa_offset isn't in a "no-man's land". */
7235 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
7236 return false;
7238 return fs.sp_valid;
7241 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
7242 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7244 static inline bool
7245 fp_valid_at (HOST_WIDE_INT cfa_offset)
7247 const struct machine_frame_state &fs = cfun->machine->fs;
7248 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
7250 /* Validate that the cfa_offset isn't in a "no-man's land". */
7251 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
7252 return false;
7254 return fs.fp_valid;
7257 /* Choose a base register based upon alignment requested, speed and/or
7258 size. */
7260 static void
7261 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
7262 HOST_WIDE_INT &base_offset,
7263 unsigned int align_reqested, unsigned int *align)
7265 const struct machine_function *m = cfun->machine;
7266 unsigned int hfp_align;
7267 unsigned int drap_align;
7268 unsigned int sp_align;
7269 bool hfp_ok = fp_valid_at (cfa_offset);
7270 bool drap_ok = m->fs.drap_valid;
7271 bool sp_ok = sp_valid_at (cfa_offset);
7273 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
7275 /* Filter out any registers that don't meet the requested alignment
7276 criteria. */
7277 if (align_reqested)
7279 if (m->fs.realigned)
7280 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
7281 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
7282 notes (which we would need to use a realigned stack pointer),
7283 so disable on SEH targets. */
7284 else if (m->fs.sp_realigned)
7285 sp_align = crtl->stack_alignment_needed;
7287 hfp_ok = hfp_ok && hfp_align >= align_reqested;
7288 drap_ok = drap_ok && drap_align >= align_reqested;
7289 sp_ok = sp_ok && sp_align >= align_reqested;
7292 if (m->use_fast_prologue_epilogue)
7294 /* Choose the base register most likely to allow the most scheduling
7295 opportunities. Generally FP is valid throughout the function,
7296 while DRAP must be reloaded within the epilogue. But choose either
7297 over the SP due to increased encoding size. */
7299 if (hfp_ok)
7301 base_reg = hard_frame_pointer_rtx;
7302 base_offset = m->fs.fp_offset - cfa_offset;
7304 else if (drap_ok)
7306 base_reg = crtl->drap_reg;
7307 base_offset = 0 - cfa_offset;
7309 else if (sp_ok)
7311 base_reg = stack_pointer_rtx;
7312 base_offset = m->fs.sp_offset - cfa_offset;
7315 else
7317 HOST_WIDE_INT toffset;
7318 int len = 16, tlen;
7320 /* Choose the base register with the smallest address encoding.
7321 With a tie, choose FP > DRAP > SP. */
7322 if (sp_ok)
7324 base_reg = stack_pointer_rtx;
7325 base_offset = m->fs.sp_offset - cfa_offset;
7326 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
7328 if (drap_ok)
7330 toffset = 0 - cfa_offset;
7331 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
7332 if (tlen <= len)
7334 base_reg = crtl->drap_reg;
7335 base_offset = toffset;
7336 len = tlen;
7339 if (hfp_ok)
7341 toffset = m->fs.fp_offset - cfa_offset;
7342 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
7343 if (tlen <= len)
7345 base_reg = hard_frame_pointer_rtx;
7346 base_offset = toffset;
7351 /* Set the align return value. */
7352 if (align)
7354 if (base_reg == stack_pointer_rtx)
7355 *align = sp_align;
7356 else if (base_reg == crtl->drap_reg)
7357 *align = drap_align;
7358 else if (base_reg == hard_frame_pointer_rtx)
7359 *align = hfp_align;
7363 /* Return an RTX that points to CFA_OFFSET within the stack frame and
7364 the alignment of address. If ALIGN is non-null, it should point to
7365 an alignment value (in bits) that is preferred or zero and will
7366 recieve the alignment of the base register that was selected,
7367 irrespective of rather or not CFA_OFFSET is a multiple of that
7368 alignment value. If it is possible for the base register offset to be
7369 non-immediate then SCRATCH_REGNO should specify a scratch register to
7370 use.
7372 The valid base registers are taken from CFUN->MACHINE->FS. */
7374 static rtx
7375 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
7376 unsigned int scratch_regno = INVALID_REGNUM)
7378 rtx base_reg = NULL;
7379 HOST_WIDE_INT base_offset = 0;
7381 /* If a specific alignment is requested, try to get a base register
7382 with that alignment first. */
7383 if (align && *align)
7384 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
7386 if (!base_reg)
7387 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
7389 gcc_assert (base_reg != NULL);
7391 rtx base_offset_rtx = GEN_INT (base_offset);
7393 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
7395 gcc_assert (scratch_regno != INVALID_REGNUM);
7397 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
7398 emit_move_insn (scratch_reg, base_offset_rtx);
7400 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
7403 return plus_constant (Pmode, base_reg, base_offset);
7406 /* Emit code to save registers in the prologue. */
7408 static void
7409 ix86_emit_save_regs (void)
7411 int regno;
7412 rtx_insn *insn;
7414 if (!TARGET_APX_PUSH2POP2
7415 || !ix86_can_use_push2pop2 ()
7416 || cfun->machine->func_type != TYPE_NORMAL)
7418 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7419 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7421 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
7422 TARGET_APX_PPX));
7423 RTX_FRAME_RELATED_P (insn) = 1;
7426 else
7428 int regno_list[2];
7429 regno_list[0] = regno_list[1] = -1;
7430 int loaded_regnum = 0;
7431 bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
7433 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7434 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7436 if (aligned)
7438 regno_list[loaded_regnum++] = regno;
7439 if (loaded_regnum == 2)
7441 gcc_assert (regno_list[0] != -1
7442 && regno_list[1] != -1
7443 && regno_list[0] != regno_list[1]);
7444 const int offset = UNITS_PER_WORD * 2;
7445 rtx mem = gen_rtx_MEM (TImode,
7446 gen_rtx_PRE_DEC (Pmode,
7447 stack_pointer_rtx));
7448 insn = emit_insn (gen_push2 (mem,
7449 gen_rtx_REG (word_mode,
7450 regno_list[0]),
7451 gen_rtx_REG (word_mode,
7452 regno_list[1]),
7453 TARGET_APX_PPX));
7454 RTX_FRAME_RELATED_P (insn) = 1;
7455 rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
7457 for (int i = 0; i < 2; i++)
7459 rtx dwarf_reg = gen_rtx_REG (word_mode,
7460 regno_list[i]);
7461 rtx sp_offset = plus_constant (Pmode,
7462 stack_pointer_rtx,
7463 + UNITS_PER_WORD
7464 * (1 - i));
7465 rtx tmp = gen_rtx_SET (gen_frame_mem (DImode,
7466 sp_offset),
7467 dwarf_reg);
7468 RTX_FRAME_RELATED_P (tmp) = 1;
7469 XVECEXP (dwarf, 0, i + 1) = tmp;
7471 rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx,
7472 plus_constant (Pmode,
7473 stack_pointer_rtx,
7474 -offset));
7475 RTX_FRAME_RELATED_P (sp_tmp) = 1;
7476 XVECEXP (dwarf, 0, 0) = sp_tmp;
7477 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
7479 loaded_regnum = 0;
7480 regno_list[0] = regno_list[1] = -1;
7483 else
7485 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
7486 TARGET_APX_PPX));
7487 RTX_FRAME_RELATED_P (insn) = 1;
7488 aligned = true;
7491 if (loaded_regnum == 1)
7493 insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
7494 regno_list[0]),
7495 TARGET_APX_PPX));
7496 RTX_FRAME_RELATED_P (insn) = 1;
7501 /* Emit a single register save at CFA - CFA_OFFSET. */
7503 static void
7504 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
7505 HOST_WIDE_INT cfa_offset)
7507 struct machine_function *m = cfun->machine;
7508 rtx reg = gen_rtx_REG (mode, regno);
7509 rtx mem, addr, base, insn;
7510 unsigned int align = GET_MODE_ALIGNMENT (mode);
7512 addr = choose_baseaddr (cfa_offset, &align);
7513 mem = gen_frame_mem (mode, addr);
7515 /* The location aligment depends upon the base register. */
7516 align = MIN (GET_MODE_ALIGNMENT (mode), align);
7517 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
7518 set_mem_align (mem, align);
7520 insn = emit_insn (gen_rtx_SET (mem, reg));
7521 RTX_FRAME_RELATED_P (insn) = 1;
7523 base = addr;
7524 if (GET_CODE (base) == PLUS)
7525 base = XEXP (base, 0);
7526 gcc_checking_assert (REG_P (base));
7528 /* When saving registers into a re-aligned local stack frame, avoid
7529 any tricky guessing by dwarf2out. */
7530 if (m->fs.realigned)
7532 gcc_checking_assert (stack_realign_drap);
7534 if (regno == REGNO (crtl->drap_reg))
7536 /* A bit of a hack. We force the DRAP register to be saved in
7537 the re-aligned stack frame, which provides us with a copy
7538 of the CFA that will last past the prologue. Install it. */
7539 gcc_checking_assert (cfun->machine->fs.fp_valid);
7540 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7541 cfun->machine->fs.fp_offset - cfa_offset);
7542 mem = gen_rtx_MEM (mode, addr);
7543 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
7545 else
7547 /* The frame pointer is a stable reference within the
7548 aligned frame. Use it. */
7549 gcc_checking_assert (cfun->machine->fs.fp_valid);
7550 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7551 cfun->machine->fs.fp_offset - cfa_offset);
7552 mem = gen_rtx_MEM (mode, addr);
7553 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7557 else if (base == stack_pointer_rtx && m->fs.sp_realigned
7558 && cfa_offset >= m->fs.sp_realigned_offset)
7560 gcc_checking_assert (stack_realign_fp);
7561 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7564 /* The memory may not be relative to the current CFA register,
7565 which means that we may need to generate a new pattern for
7566 use by the unwind info. */
7567 else if (base != m->fs.cfa_reg)
7569 addr = plus_constant (Pmode, m->fs.cfa_reg,
7570 m->fs.cfa_offset - cfa_offset);
7571 mem = gen_rtx_MEM (mode, addr);
7572 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7576 /* Emit code to save registers using MOV insns.
7577 First register is stored at CFA - CFA_OFFSET. */
7578 static void
7579 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
7581 unsigned int regno;
7583 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7584 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7586 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
7587 cfa_offset -= UNITS_PER_WORD;
7591 /* Emit code to save SSE registers using MOV insns.
7592 First register is stored at CFA - CFA_OFFSET. */
7593 static void
7594 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
7596 unsigned int regno;
7598 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7599 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7601 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7602 cfa_offset -= GET_MODE_SIZE (V4SFmode);
7606 static GTY(()) rtx queued_cfa_restores;
7608 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7609 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7610 Don't add the note if the previously saved value will be left untouched
7611 within stack red-zone till return, as unwinders can find the same value
7612 in the register and on the stack. */
7614 static void
7615 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7617 if (!crtl->shrink_wrapped
7618 && cfa_offset <= cfun->machine->fs.red_zone_offset)
7619 return;
7621 if (insn)
7623 add_reg_note (insn, REG_CFA_RESTORE, reg);
7624 RTX_FRAME_RELATED_P (insn) = 1;
7626 else
7627 queued_cfa_restores
7628 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7631 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7633 static void
7634 ix86_add_queued_cfa_restore_notes (rtx insn)
7636 rtx last;
7637 if (!queued_cfa_restores)
7638 return;
7639 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7641 XEXP (last, 1) = REG_NOTES (insn);
7642 REG_NOTES (insn) = queued_cfa_restores;
7643 queued_cfa_restores = NULL_RTX;
7644 RTX_FRAME_RELATED_P (insn) = 1;
7647 /* Expand prologue or epilogue stack adjustment.
7648 The pattern exist to put a dependency on all ebp-based memory accesses.
7649 STYLE should be negative if instructions should be marked as frame related,
7650 zero if %r11 register is live and cannot be freely used and positive
7651 otherwise. */
7653 static rtx
7654 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7655 int style, bool set_cfa)
7657 struct machine_function *m = cfun->machine;
7658 rtx addend = offset;
7659 rtx insn;
7660 bool add_frame_related_expr = false;
7662 if (!x86_64_immediate_operand (offset, Pmode))
7664 /* r11 is used by indirect sibcall return as well, set before the
7665 epilogue and used after the epilogue. */
7666 if (style)
7667 addend = gen_rtx_REG (Pmode, R11_REG);
7668 else
7670 gcc_assert (src != hard_frame_pointer_rtx
7671 && dest != hard_frame_pointer_rtx);
7672 addend = hard_frame_pointer_rtx;
7674 emit_insn (gen_rtx_SET (addend, offset));
7675 if (style < 0)
7676 add_frame_related_expr = true;
7679 insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7680 (Pmode, dest, src, addend));
7681 if (style >= 0)
7682 ix86_add_queued_cfa_restore_notes (insn);
7684 if (set_cfa)
7686 rtx r;
7688 gcc_assert (m->fs.cfa_reg == src);
7689 m->fs.cfa_offset += INTVAL (offset);
7690 m->fs.cfa_reg = dest;
7692 r = gen_rtx_PLUS (Pmode, src, offset);
7693 r = gen_rtx_SET (dest, r);
7694 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7695 RTX_FRAME_RELATED_P (insn) = 1;
7697 else if (style < 0)
7699 RTX_FRAME_RELATED_P (insn) = 1;
7700 if (add_frame_related_expr)
7702 rtx r = gen_rtx_PLUS (Pmode, src, offset);
7703 r = gen_rtx_SET (dest, r);
7704 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7708 if (dest == stack_pointer_rtx)
7710 HOST_WIDE_INT ooffset = m->fs.sp_offset;
7711 bool valid = m->fs.sp_valid;
7712 bool realigned = m->fs.sp_realigned;
7714 if (src == hard_frame_pointer_rtx)
7716 valid = m->fs.fp_valid;
7717 realigned = false;
7718 ooffset = m->fs.fp_offset;
7720 else if (src == crtl->drap_reg)
7722 valid = m->fs.drap_valid;
7723 realigned = false;
7724 ooffset = 0;
7726 else
7728 /* Else there are two possibilities: SP itself, which we set
7729 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7730 taken care of this by hand along the eh_return path. */
7731 gcc_checking_assert (src == stack_pointer_rtx
7732 || offset == const0_rtx);
7735 m->fs.sp_offset = ooffset - INTVAL (offset);
7736 m->fs.sp_valid = valid;
7737 m->fs.sp_realigned = realigned;
7739 return insn;
7742 /* Find an available register to be used as dynamic realign argument
7743 pointer regsiter. Such a register will be written in prologue and
7744 used in begin of body, so it must not be
7745 1. parameter passing register.
7746 2. GOT pointer.
7747 We reuse static-chain register if it is available. Otherwise, we
7748 use DI for i386 and R13 for x86-64. We chose R13 since it has
7749 shorter encoding.
7751 Return: the regno of chosen register. */
7753 static unsigned int
7754 find_drap_reg (void)
7756 tree decl = cfun->decl;
7758 /* Always use callee-saved register if there are no caller-saved
7759 registers. */
7760 if (TARGET_64BIT)
7762 /* Use R13 for nested function or function need static chain.
7763 Since function with tail call may use any caller-saved
7764 registers in epilogue, DRAP must not use caller-saved
7765 register in such case. */
7766 if (DECL_STATIC_CHAIN (decl)
7767 || (cfun->machine->call_saved_registers
7768 == TYPE_NO_CALLER_SAVED_REGISTERS)
7769 || crtl->tail_call_emit)
7770 return R13_REG;
7772 return R10_REG;
7774 else
7776 /* Use DI for nested function or function need static chain.
7777 Since function with tail call may use any caller-saved
7778 registers in epilogue, DRAP must not use caller-saved
7779 register in such case. */
7780 if (DECL_STATIC_CHAIN (decl)
7781 || (cfun->machine->call_saved_registers
7782 == TYPE_NO_CALLER_SAVED_REGISTERS)
7783 || crtl->tail_call_emit
7784 || crtl->calls_eh_return)
7785 return DI_REG;
7787 /* Reuse static chain register if it isn't used for parameter
7788 passing. */
7789 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
7791 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
7792 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
7793 return CX_REG;
7795 return DI_REG;
7799 /* Return minimum incoming stack alignment. */
7801 static unsigned int
7802 ix86_minimum_incoming_stack_boundary (bool sibcall)
7804 unsigned int incoming_stack_boundary;
7806 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7807 if (cfun->machine->func_type != TYPE_NORMAL)
7808 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
7809 /* Prefer the one specified at command line. */
7810 else if (ix86_user_incoming_stack_boundary)
7811 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
7812 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7813 if -mstackrealign is used, it isn't used for sibcall check and
7814 estimated stack alignment is 128bit. */
7815 else if (!sibcall
7816 && ix86_force_align_arg_pointer
7817 && crtl->stack_alignment_estimated == 128)
7818 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7819 else
7820 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
7822 /* Incoming stack alignment can be changed on individual functions
7823 via force_align_arg_pointer attribute. We use the smallest
7824 incoming stack boundary. */
7825 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
7826 && lookup_attribute ("force_align_arg_pointer",
7827 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7828 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7830 /* The incoming stack frame has to be aligned at least at
7831 parm_stack_boundary. */
7832 if (incoming_stack_boundary < crtl->parm_stack_boundary)
7833 incoming_stack_boundary = crtl->parm_stack_boundary;
7835 /* Stack at entrance of main is aligned by runtime. We use the
7836 smallest incoming stack boundary. */
7837 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
7838 && DECL_NAME (current_function_decl)
7839 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7840 && DECL_FILE_SCOPE_P (current_function_decl))
7841 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7843 return incoming_stack_boundary;
7846 /* Update incoming stack boundary and estimated stack alignment. */
7848 static void
7849 ix86_update_stack_boundary (void)
7851 ix86_incoming_stack_boundary
7852 = ix86_minimum_incoming_stack_boundary (false);
7854 /* x86_64 vararg needs 16byte stack alignment for register save area. */
7855 if (TARGET_64BIT
7856 && cfun->stdarg
7857 && crtl->stack_alignment_estimated < 128)
7858 crtl->stack_alignment_estimated = 128;
7860 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
7861 if (ix86_tls_descriptor_calls_expanded_in_cfun
7862 && crtl->preferred_stack_boundary < 128)
7863 crtl->preferred_stack_boundary = 128;
7866 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7867 needed or an rtx for DRAP otherwise. */
7869 static rtx
7870 ix86_get_drap_rtx (void)
7872 /* We must use DRAP if there are outgoing arguments on stack or
7873 the stack pointer register is clobbered by asm statment and
7874 ACCUMULATE_OUTGOING_ARGS is false. */
7875 if (ix86_force_drap
7876 || ((cfun->machine->outgoing_args_on_stack
7877 || crtl->sp_is_clobbered_by_asm)
7878 && !ACCUMULATE_OUTGOING_ARGS))
7879 crtl->need_drap = true;
7881 if (stack_realign_drap)
7883 /* Assign DRAP to vDRAP and returns vDRAP */
7884 unsigned int regno = find_drap_reg ();
7885 rtx drap_vreg;
7886 rtx arg_ptr;
7887 rtx_insn *seq, *insn;
7889 arg_ptr = gen_rtx_REG (Pmode, regno);
7890 crtl->drap_reg = arg_ptr;
7892 start_sequence ();
7893 drap_vreg = copy_to_reg (arg_ptr);
7894 seq = get_insns ();
7895 end_sequence ();
7897 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7898 if (!optimize)
7900 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
7901 RTX_FRAME_RELATED_P (insn) = 1;
7903 return drap_vreg;
7905 else
7906 return NULL;
7909 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7911 static rtx
7912 ix86_internal_arg_pointer (void)
7914 return virtual_incoming_args_rtx;
7917 struct scratch_reg {
7918 rtx reg;
7919 bool saved;
7922 /* Return a short-lived scratch register for use on function entry.
7923 In 32-bit mode, it is valid only after the registers are saved
7924 in the prologue. This register must be released by means of
7925 release_scratch_register_on_entry once it is dead. */
7927 static void
7928 get_scratch_register_on_entry (struct scratch_reg *sr)
7930 int regno;
7932 sr->saved = false;
7934 if (TARGET_64BIT)
7936 /* We always use R11 in 64-bit mode. */
7937 regno = R11_REG;
7939 else
7941 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
7942 bool fastcall_p
7943 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7944 bool thiscall_p
7945 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7946 bool static_chain_p = DECL_STATIC_CHAIN (decl);
7947 int regparm = ix86_function_regparm (fntype, decl);
7948 int drap_regno
7949 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
7951 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7952 for the static chain register. */
7953 if ((regparm < 1 || (fastcall_p && !static_chain_p))
7954 && drap_regno != AX_REG)
7955 regno = AX_REG;
7956 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7957 for the static chain register. */
7958 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
7959 regno = AX_REG;
7960 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
7961 regno = DX_REG;
7962 /* ecx is the static chain register. */
7963 else if (regparm < 3 && !fastcall_p && !thiscall_p
7964 && !static_chain_p
7965 && drap_regno != CX_REG)
7966 regno = CX_REG;
7967 else if (ix86_save_reg (BX_REG, true, false))
7968 regno = BX_REG;
7969 /* esi is the static chain register. */
7970 else if (!(regparm == 3 && static_chain_p)
7971 && ix86_save_reg (SI_REG, true, false))
7972 regno = SI_REG;
7973 else if (ix86_save_reg (DI_REG, true, false))
7974 regno = DI_REG;
7975 else
7977 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
7978 sr->saved = true;
7982 sr->reg = gen_rtx_REG (Pmode, regno);
7983 if (sr->saved)
7985 rtx_insn *insn = emit_insn (gen_push (sr->reg));
7986 RTX_FRAME_RELATED_P (insn) = 1;
7990 /* Release a scratch register obtained from the preceding function.
7992 If RELEASE_VIA_POP is true, we just pop the register off the stack
7993 to release it. This is what non-Linux systems use with -fstack-check.
7995 Otherwise we use OFFSET to locate the saved register and the
7996 allocated stack space becomes part of the local frame and is
7997 deallocated by the epilogue. */
7999 static void
8000 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
8001 bool release_via_pop)
8003 if (sr->saved)
8005 if (release_via_pop)
8007 struct machine_function *m = cfun->machine;
8008 rtx x, insn = emit_insn (gen_pop (sr->reg));
8010 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
8011 RTX_FRAME_RELATED_P (insn) = 1;
8012 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8013 x = gen_rtx_SET (stack_pointer_rtx, x);
8014 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
8015 m->fs.sp_offset -= UNITS_PER_WORD;
8017 else
8019 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
8020 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
8021 emit_insn (x);
8026 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
8028 If INT_REGISTERS_SAVED is true, then integer registers have already been
8029 pushed on the stack.
8031 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
8032 beyond SIZE bytes.
8034 This assumes no knowledge of the current probing state, i.e. it is never
8035 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
8036 a suitable probe. */
8038 static void
8039 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
8040 const bool int_registers_saved,
8041 const bool protection_area)
8043 struct machine_function *m = cfun->machine;
8045 /* If this function does not statically allocate stack space, then
8046 no probes are needed. */
8047 if (!size)
8049 /* However, the allocation of space via pushes for register
8050 saves could be viewed as allocating space, but without the
8051 need to probe. */
8052 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
8053 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8054 else
8055 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
8056 return;
8059 /* If we are a noreturn function, then we have to consider the
8060 possibility that we're called via a jump rather than a call.
8062 Thus we don't have the implicit probe generated by saving the
8063 return address into the stack at the call. Thus, the stack
8064 pointer could be anywhere in the guard page. The safe thing
8065 to do is emit a probe now.
8067 The probe can be avoided if we have already emitted any callee
8068 register saves into the stack or have a frame pointer (which will
8069 have been saved as well). Those saves will function as implicit
8070 probes.
8072 ?!? This should be revamped to work like aarch64 and s390 where
8073 we track the offset from the most recent probe. Normally that
8074 offset would be zero. For a noreturn function we would reset
8075 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
8076 we just probe when we cross PROBE_INTERVAL. */
8077 if (TREE_THIS_VOLATILE (cfun->decl)
8078 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
8080 /* We can safely use any register here since we're just going to push
8081 its value and immediately pop it back. But we do try and avoid
8082 argument passing registers so as not to introduce dependencies in
8083 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
8084 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
8085 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
8086 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
8087 m->fs.sp_offset -= UNITS_PER_WORD;
8088 if (m->fs.cfa_reg == stack_pointer_rtx)
8090 m->fs.cfa_offset -= UNITS_PER_WORD;
8091 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8092 x = gen_rtx_SET (stack_pointer_rtx, x);
8093 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
8094 RTX_FRAME_RELATED_P (insn_push) = 1;
8095 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8096 x = gen_rtx_SET (stack_pointer_rtx, x);
8097 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
8098 RTX_FRAME_RELATED_P (insn_pop) = 1;
8100 emit_insn (gen_blockage ());
8103 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8104 const int dope = 4 * UNITS_PER_WORD;
8106 /* If there is protection area, take it into account in the size. */
8107 if (protection_area)
8108 size += probe_interval + dope;
8110 /* If we allocate less than the size of the guard statically,
8111 then no probing is necessary, but we do need to allocate
8112 the stack. */
8113 else if (size < (1 << param_stack_clash_protection_guard_size))
8115 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8116 GEN_INT (-size), -1,
8117 m->fs.cfa_reg == stack_pointer_rtx);
8118 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8119 return;
8122 /* We're allocating a large enough stack frame that we need to
8123 emit probes. Either emit them inline or in a loop depending
8124 on the size. */
8125 if (size <= 4 * probe_interval)
8127 HOST_WIDE_INT i;
8128 for (i = probe_interval; i <= size; i += probe_interval)
8130 /* Allocate PROBE_INTERVAL bytes. */
8131 rtx insn
8132 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8133 GEN_INT (-probe_interval), -1,
8134 m->fs.cfa_reg == stack_pointer_rtx);
8135 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
8137 /* And probe at *sp. */
8138 emit_stack_probe (stack_pointer_rtx);
8139 emit_insn (gen_blockage ());
8142 /* We need to allocate space for the residual, but we do not need
8143 to probe the residual... */
8144 HOST_WIDE_INT residual = (i - probe_interval - size);
8145 if (residual)
8147 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8148 GEN_INT (residual), -1,
8149 m->fs.cfa_reg == stack_pointer_rtx);
8151 /* ...except if there is a protection area to maintain. */
8152 if (protection_area)
8153 emit_stack_probe (stack_pointer_rtx);
8156 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
8158 else
8160 /* We expect the GP registers to be saved when probes are used
8161 as the probing sequences might need a scratch register and
8162 the routine to allocate one assumes the integer registers
8163 have already been saved. */
8164 gcc_assert (int_registers_saved);
8166 struct scratch_reg sr;
8167 get_scratch_register_on_entry (&sr);
8169 /* If we needed to save a register, then account for any space
8170 that was pushed (we are not going to pop the register when
8171 we do the restore). */
8172 if (sr.saved)
8173 size -= UNITS_PER_WORD;
8175 /* Step 1: round SIZE down to a multiple of the interval. */
8176 HOST_WIDE_INT rounded_size = size & -probe_interval;
8178 /* Step 2: compute final value of the loop counter. Use lea if
8179 possible. */
8180 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
8181 rtx insn;
8182 if (address_no_seg_operand (addr, Pmode))
8183 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
8184 else
8186 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8187 insn = emit_insn (gen_rtx_SET (sr.reg,
8188 gen_rtx_PLUS (Pmode, sr.reg,
8189 stack_pointer_rtx)));
8191 if (m->fs.cfa_reg == stack_pointer_rtx)
8193 add_reg_note (insn, REG_CFA_DEF_CFA,
8194 plus_constant (Pmode, sr.reg,
8195 m->fs.cfa_offset + rounded_size));
8196 RTX_FRAME_RELATED_P (insn) = 1;
8199 /* Step 3: the loop. */
8200 rtx size_rtx = GEN_INT (rounded_size);
8201 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
8202 size_rtx));
8203 if (m->fs.cfa_reg == stack_pointer_rtx)
8205 m->fs.cfa_offset += rounded_size;
8206 add_reg_note (insn, REG_CFA_DEF_CFA,
8207 plus_constant (Pmode, stack_pointer_rtx,
8208 m->fs.cfa_offset));
8209 RTX_FRAME_RELATED_P (insn) = 1;
8211 m->fs.sp_offset += rounded_size;
8212 emit_insn (gen_blockage ());
8214 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
8215 is equal to ROUNDED_SIZE. */
8217 if (size != rounded_size)
8219 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8220 GEN_INT (rounded_size - size), -1,
8221 m->fs.cfa_reg == stack_pointer_rtx);
8223 if (protection_area)
8224 emit_stack_probe (stack_pointer_rtx);
8227 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
8229 /* This does not deallocate the space reserved for the scratch
8230 register. That will be deallocated in the epilogue. */
8231 release_scratch_register_on_entry (&sr, size, false);
8234 /* Adjust back to account for the protection area. */
8235 if (protection_area)
8236 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8237 GEN_INT (probe_interval + dope), -1,
8238 m->fs.cfa_reg == stack_pointer_rtx);
8240 /* Make sure nothing is scheduled before we are done. */
8241 emit_insn (gen_blockage ());
8244 /* Adjust the stack pointer up to REG while probing it. */
8246 const char *
8247 output_adjust_stack_and_probe (rtx reg)
8249 static int labelno = 0;
8250 char loop_lab[32];
8251 rtx xops[2];
8253 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8255 /* Loop. */
8256 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8258 /* SP = SP + PROBE_INTERVAL. */
8259 xops[0] = stack_pointer_rtx;
8260 xops[1] = GEN_INT (get_probe_interval ());
8261 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8263 /* Probe at SP. */
8264 xops[1] = const0_rtx;
8265 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
8267 /* Test if SP == LAST_ADDR. */
8268 xops[0] = stack_pointer_rtx;
8269 xops[1] = reg;
8270 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8272 /* Branch. */
8273 fputs ("\tjne\t", asm_out_file);
8274 assemble_name_raw (asm_out_file, loop_lab);
8275 fputc ('\n', asm_out_file);
8277 return "";
8280 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
8281 inclusive. These are offsets from the current stack pointer.
8283 INT_REGISTERS_SAVED is true if integer registers have already been
8284 pushed on the stack. */
8286 static void
8287 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
8288 const bool int_registers_saved)
8290 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8292 /* See if we have a constant small number of probes to generate. If so,
8293 that's the easy case. The run-time loop is made up of 6 insns in the
8294 generic case while the compile-time loop is made up of n insns for n #
8295 of intervals. */
8296 if (size <= 6 * probe_interval)
8298 HOST_WIDE_INT i;
8300 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
8301 it exceeds SIZE. If only one probe is needed, this will not
8302 generate any code. Then probe at FIRST + SIZE. */
8303 for (i = probe_interval; i < size; i += probe_interval)
8304 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8305 -(first + i)));
8307 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8308 -(first + size)));
8311 /* Otherwise, do the same as above, but in a loop. Note that we must be
8312 extra careful with variables wrapping around because we might be at
8313 the very top (or the very bottom) of the address space and we have
8314 to be able to handle this case properly; in particular, we use an
8315 equality test for the loop condition. */
8316 else
8318 /* We expect the GP registers to be saved when probes are used
8319 as the probing sequences might need a scratch register and
8320 the routine to allocate one assumes the integer registers
8321 have already been saved. */
8322 gcc_assert (int_registers_saved);
8324 HOST_WIDE_INT rounded_size, last;
8325 struct scratch_reg sr;
8327 get_scratch_register_on_entry (&sr);
8330 /* Step 1: round SIZE to the previous multiple of the interval. */
8332 rounded_size = ROUND_DOWN (size, probe_interval);
8335 /* Step 2: compute initial and final value of the loop counter. */
8337 /* TEST_OFFSET = FIRST. */
8338 emit_move_insn (sr.reg, GEN_INT (-first));
8340 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
8341 last = first + rounded_size;
8344 /* Step 3: the loop
8348 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
8349 probe at TEST_ADDR
8351 while (TEST_ADDR != LAST_ADDR)
8353 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
8354 until it is equal to ROUNDED_SIZE. */
8356 emit_insn
8357 (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
8360 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8361 that SIZE is equal to ROUNDED_SIZE. */
8363 if (size != rounded_size)
8364 emit_stack_probe (plus_constant (Pmode,
8365 gen_rtx_PLUS (Pmode,
8366 stack_pointer_rtx,
8367 sr.reg),
8368 rounded_size - size));
8370 release_scratch_register_on_entry (&sr, size, true);
8373 /* Make sure nothing is scheduled before we are done. */
8374 emit_insn (gen_blockage ());
8377 /* Probe a range of stack addresses from REG to END, inclusive. These are
8378 offsets from the current stack pointer. */
8380 const char *
8381 output_probe_stack_range (rtx reg, rtx end)
8383 static int labelno = 0;
8384 char loop_lab[32];
8385 rtx xops[3];
8387 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8389 /* Loop. */
8390 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8392 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8393 xops[0] = reg;
8394 xops[1] = GEN_INT (get_probe_interval ());
8395 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8397 /* Probe at TEST_ADDR. */
8398 xops[0] = stack_pointer_rtx;
8399 xops[1] = reg;
8400 xops[2] = const0_rtx;
8401 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
8403 /* Test if TEST_ADDR == LAST_ADDR. */
8404 xops[0] = reg;
8405 xops[1] = end;
8406 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8408 /* Branch. */
8409 fputs ("\tjne\t", asm_out_file);
8410 assemble_name_raw (asm_out_file, loop_lab);
8411 fputc ('\n', asm_out_file);
8413 return "";
8416 /* Set stack_frame_required to false if stack frame isn't required.
8417 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8418 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8420 static void
8421 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
8422 bool check_stack_slot)
8424 HARD_REG_SET set_up_by_prologue, prologue_used;
8425 basic_block bb;
8427 CLEAR_HARD_REG_SET (prologue_used);
8428 CLEAR_HARD_REG_SET (set_up_by_prologue);
8429 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
8430 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
8431 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
8432 HARD_FRAME_POINTER_REGNUM);
8434 /* The preferred stack alignment is the minimum stack alignment. */
8435 if (stack_alignment > crtl->preferred_stack_boundary)
8436 stack_alignment = crtl->preferred_stack_boundary;
8438 bool require_stack_frame = false;
8440 FOR_EACH_BB_FN (bb, cfun)
8442 rtx_insn *insn;
8443 FOR_BB_INSNS (bb, insn)
8444 if (NONDEBUG_INSN_P (insn)
8445 && requires_stack_frame_p (insn, prologue_used,
8446 set_up_by_prologue))
8448 require_stack_frame = true;
8450 if (check_stack_slot)
8452 /* Find the maximum stack alignment. */
8453 subrtx_iterator::array_type array;
8454 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
8455 if (MEM_P (*iter)
8456 && (reg_mentioned_p (stack_pointer_rtx,
8457 *iter)
8458 || reg_mentioned_p (frame_pointer_rtx,
8459 *iter)))
8461 unsigned int alignment = MEM_ALIGN (*iter);
8462 if (alignment > stack_alignment)
8463 stack_alignment = alignment;
8469 cfun->machine->stack_frame_required = require_stack_frame;
8472 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
8473 will guide prologue/epilogue to be generated in correct form. */
8475 static void
8476 ix86_finalize_stack_frame_flags (void)
8478 /* Check if stack realign is really needed after reload, and
8479 stores result in cfun */
8480 unsigned int incoming_stack_boundary
8481 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8482 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8483 unsigned int stack_alignment
8484 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
8485 ? crtl->max_used_stack_slot_alignment
8486 : crtl->stack_alignment_needed);
8487 unsigned int stack_realign
8488 = (incoming_stack_boundary < stack_alignment);
8489 bool recompute_frame_layout_p = false;
8491 if (crtl->stack_realign_finalized)
8493 /* After stack_realign_needed is finalized, we can't no longer
8494 change it. */
8495 gcc_assert (crtl->stack_realign_needed == stack_realign);
8496 return;
8499 /* It is always safe to compute max_used_stack_alignment. We
8500 compute it only if 128-bit aligned load/store may be generated
8501 on misaligned stack slot which will lead to segfault. */
8502 bool check_stack_slot
8503 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
8504 ix86_find_max_used_stack_alignment (stack_alignment,
8505 check_stack_slot);
8507 /* If the only reason for frame_pointer_needed is that we conservatively
8508 assumed stack realignment might be needed or -fno-omit-frame-pointer
8509 is used, but in the end nothing that needed the stack alignment had
8510 been spilled nor stack access, clear frame_pointer_needed and say we
8511 don't need stack realignment.
8513 When vector register is used for piecewise move and store, we don't
8514 increase stack_alignment_needed as there is no register spill for
8515 piecewise move and store. Since stack_realign_needed is set to true
8516 by checking stack_alignment_estimated which is updated by pseudo
8517 vector register usage, we also need to check stack_realign_needed to
8518 eliminate frame pointer. */
8519 if ((stack_realign
8520 || (!flag_omit_frame_pointer && optimize)
8521 || crtl->stack_realign_needed)
8522 && frame_pointer_needed
8523 && crtl->is_leaf
8524 && crtl->sp_is_unchanging
8525 && !ix86_current_function_calls_tls_descriptor
8526 && !crtl->accesses_prior_frames
8527 && !cfun->calls_alloca
8528 && !crtl->calls_eh_return
8529 /* See ira_setup_eliminable_regset for the rationale. */
8530 && !(STACK_CHECK_MOVING_SP
8531 && flag_stack_check
8532 && flag_exceptions
8533 && cfun->can_throw_non_call_exceptions)
8534 && !ix86_frame_pointer_required ()
8535 && ix86_get_frame_size () == 0
8536 && ix86_nsaved_sseregs () == 0
8537 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
8539 if (cfun->machine->stack_frame_required)
8541 /* Stack frame is required. If stack alignment needed is less
8542 than incoming stack boundary, don't realign stack. */
8543 stack_realign = incoming_stack_boundary < stack_alignment;
8544 if (!stack_realign)
8546 crtl->max_used_stack_slot_alignment
8547 = incoming_stack_boundary;
8548 crtl->stack_alignment_needed
8549 = incoming_stack_boundary;
8550 /* Also update preferred_stack_boundary for leaf
8551 functions. */
8552 crtl->preferred_stack_boundary
8553 = incoming_stack_boundary;
8556 else
8558 /* If drap has been set, but it actually isn't live at the
8559 start of the function, there is no reason to set it up. */
8560 if (crtl->drap_reg)
8562 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8563 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
8564 REGNO (crtl->drap_reg)))
8566 crtl->drap_reg = NULL_RTX;
8567 crtl->need_drap = false;
8570 else
8571 cfun->machine->no_drap_save_restore = true;
8573 frame_pointer_needed = false;
8574 stack_realign = false;
8575 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
8576 crtl->stack_alignment_needed = incoming_stack_boundary;
8577 crtl->stack_alignment_estimated = incoming_stack_boundary;
8578 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
8579 crtl->preferred_stack_boundary = incoming_stack_boundary;
8580 df_finish_pass (true);
8581 df_scan_alloc (NULL);
8582 df_scan_blocks ();
8583 df_compute_regs_ever_live (true);
8584 df_analyze ();
8586 if (flag_var_tracking)
8588 /* Since frame pointer is no longer available, replace it with
8589 stack pointer - UNITS_PER_WORD in debug insns. */
8590 df_ref ref, next;
8591 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
8592 ref; ref = next)
8594 next = DF_REF_NEXT_REG (ref);
8595 if (!DF_REF_INSN_INFO (ref))
8596 continue;
8598 /* Make sure the next ref is for a different instruction,
8599 so that we're not affected by the rescan. */
8600 rtx_insn *insn = DF_REF_INSN (ref);
8601 while (next && DF_REF_INSN (next) == insn)
8602 next = DF_REF_NEXT_REG (next);
8604 if (DEBUG_INSN_P (insn))
8606 bool changed = false;
8607 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
8609 rtx *loc = DF_REF_LOC (ref);
8610 if (*loc == hard_frame_pointer_rtx)
8612 *loc = plus_constant (Pmode,
8613 stack_pointer_rtx,
8614 -UNITS_PER_WORD);
8615 changed = true;
8618 if (changed)
8619 df_insn_rescan (insn);
8624 recompute_frame_layout_p = true;
8627 else if (crtl->max_used_stack_slot_alignment >= 128
8628 && cfun->machine->stack_frame_required)
8630 /* We don't need to realign stack. max_used_stack_alignment is
8631 used to decide how stack frame should be aligned. This is
8632 independent of any psABIs nor 32-bit vs 64-bit. */
8633 cfun->machine->max_used_stack_alignment
8634 = stack_alignment / BITS_PER_UNIT;
8637 if (crtl->stack_realign_needed != stack_realign)
8638 recompute_frame_layout_p = true;
8639 crtl->stack_realign_needed = stack_realign;
8640 crtl->stack_realign_finalized = true;
8641 if (recompute_frame_layout_p)
8642 ix86_compute_frame_layout ();
8645 /* Delete SET_GOT right after entry block if it is allocated to reg. */
8647 static void
8648 ix86_elim_entry_set_got (rtx reg)
8650 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8651 rtx_insn *c_insn = BB_HEAD (bb);
8652 if (!NONDEBUG_INSN_P (c_insn))
8653 c_insn = next_nonnote_nondebug_insn (c_insn);
8654 if (c_insn && NONJUMP_INSN_P (c_insn))
8656 rtx pat = PATTERN (c_insn);
8657 if (GET_CODE (pat) == PARALLEL)
8659 rtx set = XVECEXP (pat, 0, 0);
8660 if (GET_CODE (set) == SET
8661 && GET_CODE (SET_SRC (set)) == UNSPEC
8662 && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT
8663 && REGNO (SET_DEST (set)) == REGNO (reg))
8664 delete_insn (c_insn);
8669 static rtx
8670 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
8672 rtx addr, mem;
8674 if (offset)
8675 addr = plus_constant (Pmode, frame_reg, offset);
8676 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
8677 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
8680 static inline rtx
8681 gen_frame_load (rtx reg, rtx frame_reg, int offset)
8683 return gen_frame_set (reg, frame_reg, offset, false);
8686 static inline rtx
8687 gen_frame_store (rtx reg, rtx frame_reg, int offset)
8689 return gen_frame_set (reg, frame_reg, offset, true);
8692 static void
8693 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
8695 struct machine_function *m = cfun->machine;
8696 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8697 + m->call_ms2sysv_extra_regs;
8698 rtvec v = rtvec_alloc (ncregs + 1);
8699 unsigned int align, i, vi = 0;
8700 rtx_insn *insn;
8701 rtx sym, addr;
8702 rtx rax = gen_rtx_REG (word_mode, AX_REG);
8703 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8705 /* AL should only be live with sysv_abi. */
8706 gcc_assert (!ix86_eax_live_at_start_p ());
8707 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
8709 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
8710 we've actually realigned the stack or not. */
8711 align = GET_MODE_ALIGNMENT (V4SFmode);
8712 addr = choose_baseaddr (frame.stack_realign_offset
8713 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
8714 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8716 emit_insn (gen_rtx_SET (rax, addr));
8718 /* Get the stub symbol. */
8719 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
8720 : XLOGUE_STUB_SAVE);
8721 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8723 for (i = 0; i < ncregs; ++i)
8725 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8726 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
8727 r.regno);
8728 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
8731 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
8733 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
8734 RTX_FRAME_RELATED_P (insn) = true;
8737 /* Generate and return an insn body to AND X with Y. */
8739 static rtx_insn *
8740 gen_and2_insn (rtx x, rtx y)
8742 enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
8744 gcc_assert (insn_operand_matches (icode, 0, x));
8745 gcc_assert (insn_operand_matches (icode, 1, x));
8746 gcc_assert (insn_operand_matches (icode, 2, y));
8748 return GEN_FCN (icode) (x, x, y);
8751 /* Expand the prologue into a bunch of separate insns. */
8753 void
8754 ix86_expand_prologue (void)
8756 struct machine_function *m = cfun->machine;
8757 rtx insn, t;
8758 HOST_WIDE_INT allocate;
8759 bool int_registers_saved;
8760 bool sse_registers_saved;
8761 bool save_stub_call_needed;
8762 rtx static_chain = NULL_RTX;
8764 ix86_last_zero_store_uid = 0;
8765 if (ix86_function_naked (current_function_decl))
8767 if (flag_stack_usage_info)
8768 current_function_static_stack_size = 0;
8769 return;
8772 ix86_finalize_stack_frame_flags ();
8774 /* DRAP should not coexist with stack_realign_fp */
8775 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8777 memset (&m->fs, 0, sizeof (m->fs));
8779 /* Initialize CFA state for before the prologue. */
8780 m->fs.cfa_reg = stack_pointer_rtx;
8781 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
8783 /* Track SP offset to the CFA. We continue tracking this after we've
8784 swapped the CFA register away from SP. In the case of re-alignment
8785 this is fudged; we're interested to offsets within the local frame. */
8786 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8787 m->fs.sp_valid = true;
8788 m->fs.sp_realigned = false;
8790 const struct ix86_frame &frame = cfun->machine->frame;
8792 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
8794 /* We should have already generated an error for any use of
8795 ms_hook on a nested function. */
8796 gcc_checking_assert (!ix86_static_chain_on_stack);
8798 /* Check if profiling is active and we shall use profiling before
8799 prologue variant. If so sorry. */
8800 if (crtl->profile && flag_fentry != 0)
8801 sorry ("%<ms_hook_prologue%> attribute is not compatible "
8802 "with %<-mfentry%> for 32-bit");
8804 /* In ix86_asm_output_function_label we emitted:
8805 8b ff movl.s %edi,%edi
8806 55 push %ebp
8807 8b ec movl.s %esp,%ebp
8809 This matches the hookable function prologue in Win32 API
8810 functions in Microsoft Windows XP Service Pack 2 and newer.
8811 Wine uses this to enable Windows apps to hook the Win32 API
8812 functions provided by Wine.
8814 What that means is that we've already set up the frame pointer. */
8816 if (frame_pointer_needed
8817 && !(crtl->drap_reg && crtl->stack_realign_needed))
8819 rtx push, mov;
8821 /* We've decided to use the frame pointer already set up.
8822 Describe this to the unwinder by pretending that both
8823 push and mov insns happen right here.
8825 Putting the unwind info here at the end of the ms_hook
8826 is done so that we can make absolutely certain we get
8827 the required byte sequence at the start of the function,
8828 rather than relying on an assembler that can produce
8829 the exact encoding required.
8831 However it does mean (in the unpatched case) that we have
8832 a 1 insn window where the asynchronous unwind info is
8833 incorrect. However, if we placed the unwind info at
8834 its correct location we would have incorrect unwind info
8835 in the patched case. Which is probably all moot since
8836 I don't expect Wine generates dwarf2 unwind info for the
8837 system libraries that use this feature. */
8839 insn = emit_insn (gen_blockage ());
8841 push = gen_push (hard_frame_pointer_rtx);
8842 mov = gen_rtx_SET (hard_frame_pointer_rtx,
8843 stack_pointer_rtx);
8844 RTX_FRAME_RELATED_P (push) = 1;
8845 RTX_FRAME_RELATED_P (mov) = 1;
8847 RTX_FRAME_RELATED_P (insn) = 1;
8848 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8849 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
8851 /* Note that gen_push incremented m->fs.cfa_offset, even
8852 though we didn't emit the push insn here. */
8853 m->fs.cfa_reg = hard_frame_pointer_rtx;
8854 m->fs.fp_offset = m->fs.cfa_offset;
8855 m->fs.fp_valid = true;
8857 else
8859 /* The frame pointer is not needed so pop %ebp again.
8860 This leaves us with a pristine state. */
8861 emit_insn (gen_pop (hard_frame_pointer_rtx));
8865 /* The first insn of a function that accepts its static chain on the
8866 stack is to push the register that would be filled in by a direct
8867 call. This insn will be skipped by the trampoline. */
8868 else if (ix86_static_chain_on_stack)
8870 static_chain = ix86_static_chain (cfun->decl, false);
8871 insn = emit_insn (gen_push (static_chain));
8872 emit_insn (gen_blockage ());
8874 /* We don't want to interpret this push insn as a register save,
8875 only as a stack adjustment. The real copy of the register as
8876 a save will be done later, if needed. */
8877 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8878 t = gen_rtx_SET (stack_pointer_rtx, t);
8879 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8880 RTX_FRAME_RELATED_P (insn) = 1;
8883 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8884 of DRAP is needed and stack realignment is really needed after reload */
8885 if (stack_realign_drap)
8887 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8889 /* Can't use DRAP in interrupt function. */
8890 if (cfun->machine->func_type != TYPE_NORMAL)
8891 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8892 "in interrupt service routine. This may be worked "
8893 "around by avoiding functions with aggregate return.");
8895 /* Only need to push parameter pointer reg if it is caller saved. */
8896 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8898 /* Push arg pointer reg */
8899 insn = emit_insn (gen_push (crtl->drap_reg));
8900 RTX_FRAME_RELATED_P (insn) = 1;
8903 /* Grab the argument pointer. */
8904 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
8905 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8906 RTX_FRAME_RELATED_P (insn) = 1;
8907 m->fs.cfa_reg = crtl->drap_reg;
8908 m->fs.cfa_offset = 0;
8910 /* Align the stack. */
8911 insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
8912 GEN_INT (-align_bytes)));
8913 RTX_FRAME_RELATED_P (insn) = 1;
8915 /* Replicate the return address on the stack so that return
8916 address can be reached via (argp - 1) slot. This is needed
8917 to implement macro RETURN_ADDR_RTX and intrinsic function
8918 expand_builtin_return_addr etc. */
8919 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
8920 t = gen_frame_mem (word_mode, t);
8921 insn = emit_insn (gen_push (t));
8922 RTX_FRAME_RELATED_P (insn) = 1;
8924 /* For the purposes of frame and register save area addressing,
8925 we've started over with a new frame. */
8926 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8927 m->fs.realigned = true;
8929 if (static_chain)
8931 /* Replicate static chain on the stack so that static chain
8932 can be reached via (argp - 2) slot. This is needed for
8933 nested function with stack realignment. */
8934 insn = emit_insn (gen_push (static_chain));
8935 RTX_FRAME_RELATED_P (insn) = 1;
8939 int_registers_saved = (frame.nregs == 0);
8940 sse_registers_saved = (frame.nsseregs == 0);
8941 save_stub_call_needed = (m->call_ms2sysv);
8942 gcc_assert (sse_registers_saved || !save_stub_call_needed);
8944 if (frame_pointer_needed && !m->fs.fp_valid)
8946 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8947 slower on all targets. Also sdb didn't like it. */
8948 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8949 RTX_FRAME_RELATED_P (insn) = 1;
8951 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
8953 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8954 RTX_FRAME_RELATED_P (insn) = 1;
8956 if (m->fs.cfa_reg == stack_pointer_rtx)
8957 m->fs.cfa_reg = hard_frame_pointer_rtx;
8958 m->fs.fp_offset = m->fs.sp_offset;
8959 m->fs.fp_valid = true;
8963 if (!int_registers_saved)
8965 /* If saving registers via PUSH, do so now. */
8966 if (!frame.save_regs_using_mov)
8968 ix86_emit_save_regs ();
8969 int_registers_saved = true;
8970 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8973 /* When using red zone we may start register saving before allocating
8974 the stack frame saving one cycle of the prologue. However, avoid
8975 doing this if we have to probe the stack; at least on x86_64 the
8976 stack probe can turn into a call that clobbers a red zone location. */
8977 else if (ix86_using_red_zone ()
8978 && (! TARGET_STACK_PROBE
8979 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
8981 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8982 cfun->machine->red_zone_used = true;
8983 int_registers_saved = true;
8987 if (frame.red_zone_size != 0)
8988 cfun->machine->red_zone_used = true;
8990 if (stack_realign_fp)
8992 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8993 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8995 /* Record last valid frame pointer offset. */
8996 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
8998 /* The computation of the size of the re-aligned stack frame means
8999 that we must allocate the size of the register save area before
9000 performing the actual alignment. Otherwise we cannot guarantee
9001 that there's enough storage above the realignment point. */
9002 allocate = frame.reg_save_offset - m->fs.sp_offset
9003 + frame.stack_realign_allocate;
9004 if (allocate)
9005 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9006 GEN_INT (-allocate), -1, false);
9008 /* Align the stack. */
9009 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
9010 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
9011 m->fs.sp_realigned_offset = m->fs.sp_offset
9012 - frame.stack_realign_allocate;
9013 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
9014 Beyond this point, stack access should be done via choose_baseaddr or
9015 by using sp_valid_at and fp_valid_at to determine the correct base
9016 register. Henceforth, any CFA offset should be thought of as logical
9017 and not physical. */
9018 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
9019 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
9020 m->fs.sp_realigned = true;
9022 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
9023 is needed to describe where a register is saved using a realigned
9024 stack pointer, so we need to invalidate the stack pointer for that
9025 target. */
9026 if (TARGET_SEH)
9027 m->fs.sp_valid = false;
9029 /* If SP offset is non-immediate after allocation of the stack frame,
9030 then emit SSE saves or stub call prior to allocating the rest of the
9031 stack frame. This is less efficient for the out-of-line stub because
9032 we can't combine allocations across the call barrier, but it's better
9033 than using a scratch register. */
9034 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
9035 - m->fs.sp_realigned_offset),
9036 Pmode))
9038 if (!sse_registers_saved)
9040 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9041 sse_registers_saved = true;
9043 else if (save_stub_call_needed)
9045 ix86_emit_outlined_ms2sysv_save (frame);
9046 save_stub_call_needed = false;
9051 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9053 if (flag_stack_usage_info)
9055 /* We start to count from ARG_POINTER. */
9056 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9058 /* If it was realigned, take into account the fake frame. */
9059 if (stack_realign_drap)
9061 if (ix86_static_chain_on_stack)
9062 stack_size += UNITS_PER_WORD;
9064 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9065 stack_size += UNITS_PER_WORD;
9067 /* This over-estimates by 1 minimal-stack-alignment-unit but
9068 mitigates that by counting in the new return address slot. */
9069 current_function_dynamic_stack_size
9070 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9073 current_function_static_stack_size = stack_size;
9076 /* On SEH target with very large frame size, allocate an area to save
9077 SSE registers (as the very large allocation won't be described). */
9078 if (TARGET_SEH
9079 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
9080 && !sse_registers_saved)
9082 HOST_WIDE_INT sse_size
9083 = frame.sse_reg_save_offset - frame.reg_save_offset;
9085 gcc_assert (int_registers_saved);
9087 /* No need to do stack checking as the area will be immediately
9088 written. */
9089 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9090 GEN_INT (-sse_size), -1,
9091 m->fs.cfa_reg == stack_pointer_rtx);
9092 allocate -= sse_size;
9093 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9094 sse_registers_saved = true;
9097 /* If stack clash protection is requested, then probe the stack, unless it
9098 is already probed on the target. */
9099 if (allocate >= 0
9100 && flag_stack_clash_protection
9101 && !ix86_target_stack_probe ())
9103 ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
9104 allocate = 0;
9107 /* The stack has already been decremented by the instruction calling us
9108 so probe if the size is non-negative to preserve the protection area. */
9109 else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9111 const HOST_WIDE_INT probe_interval = get_probe_interval ();
9113 if (STACK_CHECK_MOVING_SP)
9115 if (crtl->is_leaf
9116 && !cfun->calls_alloca
9117 && allocate <= probe_interval)
9120 else
9122 ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
9123 allocate = 0;
9127 else
9129 HOST_WIDE_INT size = allocate;
9131 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
9132 size = 0x80000000 - get_stack_check_protect () - 1;
9134 if (TARGET_STACK_PROBE)
9136 if (crtl->is_leaf && !cfun->calls_alloca)
9138 if (size > probe_interval)
9139 ix86_emit_probe_stack_range (0, size, int_registers_saved);
9141 else
9142 ix86_emit_probe_stack_range (0,
9143 size + get_stack_check_protect (),
9144 int_registers_saved);
9146 else
9148 if (crtl->is_leaf && !cfun->calls_alloca)
9150 if (size > probe_interval
9151 && size > get_stack_check_protect ())
9152 ix86_emit_probe_stack_range (get_stack_check_protect (),
9153 (size
9154 - get_stack_check_protect ()),
9155 int_registers_saved);
9157 else
9158 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
9159 int_registers_saved);
9164 if (allocate == 0)
9166 else if (!ix86_target_stack_probe ()
9167 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9169 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9170 GEN_INT (-allocate), -1,
9171 m->fs.cfa_reg == stack_pointer_rtx);
9173 else
9175 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9176 rtx r10 = NULL;
9177 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
9178 bool eax_live = ix86_eax_live_at_start_p ();
9179 bool r10_live = false;
9181 if (TARGET_64BIT)
9182 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9184 if (eax_live)
9186 insn = emit_insn (gen_push (eax));
9187 allocate -= UNITS_PER_WORD;
9188 /* Note that SEH directives need to continue tracking the stack
9189 pointer even after the frame pointer has been set up. */
9190 if (sp_is_cfa_reg || TARGET_SEH)
9192 if (sp_is_cfa_reg)
9193 m->fs.cfa_offset += UNITS_PER_WORD;
9194 RTX_FRAME_RELATED_P (insn) = 1;
9195 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9196 gen_rtx_SET (stack_pointer_rtx,
9197 plus_constant (Pmode,
9198 stack_pointer_rtx,
9199 -UNITS_PER_WORD)));
9203 if (r10_live)
9205 r10 = gen_rtx_REG (Pmode, R10_REG);
9206 insn = emit_insn (gen_push (r10));
9207 allocate -= UNITS_PER_WORD;
9208 if (sp_is_cfa_reg || TARGET_SEH)
9210 if (sp_is_cfa_reg)
9211 m->fs.cfa_offset += UNITS_PER_WORD;
9212 RTX_FRAME_RELATED_P (insn) = 1;
9213 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9214 gen_rtx_SET (stack_pointer_rtx,
9215 plus_constant (Pmode,
9216 stack_pointer_rtx,
9217 -UNITS_PER_WORD)));
9221 emit_move_insn (eax, GEN_INT (allocate));
9222 emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
9224 /* Use the fact that AX still contains ALLOCATE. */
9225 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
9226 (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
9228 if (sp_is_cfa_reg || TARGET_SEH)
9230 if (sp_is_cfa_reg)
9231 m->fs.cfa_offset += allocate;
9232 RTX_FRAME_RELATED_P (insn) = 1;
9233 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9234 gen_rtx_SET (stack_pointer_rtx,
9235 plus_constant (Pmode, stack_pointer_rtx,
9236 -allocate)));
9238 m->fs.sp_offset += allocate;
9240 /* Use stack_pointer_rtx for relative addressing so that code works for
9241 realigned stack. But this means that we need a blockage to prevent
9242 stores based on the frame pointer from being scheduled before. */
9243 if (r10_live && eax_live)
9245 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9246 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9247 gen_frame_mem (word_mode, t));
9248 t = plus_constant (Pmode, t, UNITS_PER_WORD);
9249 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
9250 gen_frame_mem (word_mode, t));
9251 emit_insn (gen_memory_blockage ());
9253 else if (eax_live || r10_live)
9255 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
9256 emit_move_insn (gen_rtx_REG (word_mode,
9257 (eax_live ? AX_REG : R10_REG)),
9258 gen_frame_mem (word_mode, t));
9259 emit_insn (gen_memory_blockage ());
9262 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9264 /* If we havn't already set up the frame pointer, do so now. */
9265 if (frame_pointer_needed && !m->fs.fp_valid)
9267 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
9268 GEN_INT (frame.stack_pointer_offset
9269 - frame.hard_frame_pointer_offset));
9270 insn = emit_insn (insn);
9271 RTX_FRAME_RELATED_P (insn) = 1;
9272 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
9274 if (m->fs.cfa_reg == stack_pointer_rtx)
9275 m->fs.cfa_reg = hard_frame_pointer_rtx;
9276 m->fs.fp_offset = frame.hard_frame_pointer_offset;
9277 m->fs.fp_valid = true;
9280 if (!int_registers_saved)
9281 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9282 if (!sse_registers_saved)
9283 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9284 else if (save_stub_call_needed)
9285 ix86_emit_outlined_ms2sysv_save (frame);
9287 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
9288 in PROLOGUE. */
9289 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
9291 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
9292 insn = emit_insn (gen_set_got (pic));
9293 RTX_FRAME_RELATED_P (insn) = 1;
9294 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
9295 emit_insn (gen_prologue_use (pic));
9296 /* Deleting already emmitted SET_GOT if exist and allocated to
9297 REAL_PIC_OFFSET_TABLE_REGNUM. */
9298 ix86_elim_entry_set_got (pic);
9301 if (crtl->drap_reg && !crtl->stack_realign_needed)
9303 /* vDRAP is setup but after reload it turns out stack realign
9304 isn't necessary, here we will emit prologue to setup DRAP
9305 without stack realign adjustment */
9306 t = choose_baseaddr (0, NULL);
9307 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9310 /* Prevent instructions from being scheduled into register save push
9311 sequence when access to the redzone area is done through frame pointer.
9312 The offset between the frame pointer and the stack pointer is calculated
9313 relative to the value of the stack pointer at the end of the function
9314 prologue, and moving instructions that access redzone area via frame
9315 pointer inside push sequence violates this assumption. */
9316 if (frame_pointer_needed && frame.red_zone_size)
9317 emit_insn (gen_memory_blockage ());
9319 /* SEH requires that the prologue end within 256 bytes of the start of
9320 the function. Prevent instruction schedules that would extend that.
9321 Further, prevent alloca modifications to the stack pointer from being
9322 combined with prologue modifications. */
9323 if (TARGET_SEH)
9324 emit_insn (gen_prologue_use (stack_pointer_rtx));
9327 /* Emit code to restore REG using a POP or POPP insn. */
9329 static void
9330 ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
9332 struct machine_function *m = cfun->machine;
9333 rtx_insn *insn = emit_insn (gen_pop (reg, ppx_p));
9335 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9336 m->fs.sp_offset -= UNITS_PER_WORD;
9338 if (m->fs.cfa_reg == crtl->drap_reg
9339 && REGNO (reg) == REGNO (crtl->drap_reg))
9341 /* Previously we'd represented the CFA as an expression
9342 like *(%ebp - 8). We've just popped that value from
9343 the stack, which means we need to reset the CFA to
9344 the drap register. This will remain until we restore
9345 the stack pointer. */
9346 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9347 RTX_FRAME_RELATED_P (insn) = 1;
9349 /* This means that the DRAP register is valid for addressing too. */
9350 m->fs.drap_valid = true;
9351 return;
9354 if (m->fs.cfa_reg == stack_pointer_rtx)
9356 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9357 x = gen_rtx_SET (stack_pointer_rtx, x);
9358 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9359 RTX_FRAME_RELATED_P (insn) = 1;
9361 m->fs.cfa_offset -= UNITS_PER_WORD;
9364 /* When the frame pointer is the CFA, and we pop it, we are
9365 swapping back to the stack pointer as the CFA. This happens
9366 for stack frames that don't allocate other data, so we assume
9367 the stack pointer is now pointing at the return address, i.e.
9368 the function entry state, which makes the offset be 1 word. */
9369 if (reg == hard_frame_pointer_rtx)
9371 m->fs.fp_valid = false;
9372 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9374 m->fs.cfa_reg = stack_pointer_rtx;
9375 m->fs.cfa_offset -= UNITS_PER_WORD;
9377 add_reg_note (insn, REG_CFA_DEF_CFA,
9378 plus_constant (Pmode, stack_pointer_rtx,
9379 m->fs.cfa_offset));
9380 RTX_FRAME_RELATED_P (insn) = 1;
9385 /* Emit code to restore REG using a POP2 insn. */
9386 static void
9387 ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false)
9389 struct machine_function *m = cfun->machine;
9390 const int offset = UNITS_PER_WORD * 2;
9391 rtx_insn *insn;
9393 rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
9394 stack_pointer_rtx));
9396 if (ppx_p)
9397 insn = emit_insn (gen_pop2p_di (reg1, mem, reg2));
9398 else
9399 insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
9401 RTX_FRAME_RELATED_P (insn) = 1;
9403 rtx dwarf = NULL_RTX;
9404 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf);
9405 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf);
9406 REG_NOTES (insn) = dwarf;
9407 m->fs.sp_offset -= offset;
9409 if (m->fs.cfa_reg == crtl->drap_reg
9410 && (REGNO (reg1) == REGNO (crtl->drap_reg)
9411 || REGNO (reg2) == REGNO (crtl->drap_reg)))
9413 /* Previously we'd represented the CFA as an expression
9414 like *(%ebp - 8). We've just popped that value from
9415 the stack, which means we need to reset the CFA to
9416 the drap register. This will remain until we restore
9417 the stack pointer. */
9418 add_reg_note (insn, REG_CFA_DEF_CFA,
9419 REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2);
9420 RTX_FRAME_RELATED_P (insn) = 1;
9422 /* This means that the DRAP register is valid for addressing too. */
9423 m->fs.drap_valid = true;
9424 return;
9427 if (m->fs.cfa_reg == stack_pointer_rtx)
9429 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
9430 x = gen_rtx_SET (stack_pointer_rtx, x);
9431 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9432 RTX_FRAME_RELATED_P (insn) = 1;
9434 m->fs.cfa_offset -= offset;
9437 /* When the frame pointer is the CFA, and we pop it, we are
9438 swapping back to the stack pointer as the CFA. This happens
9439 for stack frames that don't allocate other data, so we assume
9440 the stack pointer is now pointing at the return address, i.e.
9441 the function entry state, which makes the offset be 1 word. */
9442 if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx)
9444 m->fs.fp_valid = false;
9445 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9447 m->fs.cfa_reg = stack_pointer_rtx;
9448 m->fs.cfa_offset -= offset;
9450 add_reg_note (insn, REG_CFA_DEF_CFA,
9451 plus_constant (Pmode, stack_pointer_rtx,
9452 m->fs.cfa_offset));
9453 RTX_FRAME_RELATED_P (insn) = 1;
9458 /* Emit code to restore saved registers using POP insns. */
9460 static void
9461 ix86_emit_restore_regs_using_pop (bool ppx_p)
9463 unsigned int regno;
9465 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9466 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
9467 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p);
9470 /* Emit code to restore saved registers using POP2 insns. */
9472 static void
9473 ix86_emit_restore_regs_using_pop2 (void)
9475 int regno;
9476 int regno_list[2];
9477 regno_list[0] = regno_list[1] = -1;
9478 int loaded_regnum = 0;
9479 bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
9481 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9482 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
9484 if (aligned)
9486 regno_list[loaded_regnum++] = regno;
9487 if (loaded_regnum == 2)
9489 gcc_assert (regno_list[0] != -1
9490 && regno_list[1] != -1
9491 && regno_list[0] != regno_list[1]);
9493 ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode,
9494 regno_list[0]),
9495 gen_rtx_REG (word_mode,
9496 regno_list[1]),
9497 TARGET_APX_PPX);
9498 loaded_regnum = 0;
9499 regno_list[0] = regno_list[1] = -1;
9502 else
9504 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno),
9505 TARGET_APX_PPX);
9506 aligned = true;
9510 if (loaded_regnum == 1)
9511 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]),
9512 TARGET_APX_PPX);
9515 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
9516 omits the emit and only attaches the notes. */
9518 static void
9519 ix86_emit_leave (rtx_insn *insn)
9521 struct machine_function *m = cfun->machine;
9523 if (!insn)
9524 insn = emit_insn (gen_leave (word_mode));
9526 ix86_add_queued_cfa_restore_notes (insn);
9528 gcc_assert (m->fs.fp_valid);
9529 m->fs.sp_valid = true;
9530 m->fs.sp_realigned = false;
9531 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9532 m->fs.fp_valid = false;
9534 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9536 m->fs.cfa_reg = stack_pointer_rtx;
9537 m->fs.cfa_offset = m->fs.sp_offset;
9539 add_reg_note (insn, REG_CFA_DEF_CFA,
9540 plus_constant (Pmode, stack_pointer_rtx,
9541 m->fs.sp_offset));
9542 RTX_FRAME_RELATED_P (insn) = 1;
9544 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9545 m->fs.fp_offset);
9548 /* Emit code to restore saved registers using MOV insns.
9549 First register is restored from CFA - CFA_OFFSET. */
9550 static void
9551 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9552 bool maybe_eh_return)
9554 struct machine_function *m = cfun->machine;
9555 unsigned int regno;
9557 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9558 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
9560 rtx reg = gen_rtx_REG (word_mode, regno);
9561 rtx mem;
9562 rtx_insn *insn;
9564 mem = choose_baseaddr (cfa_offset, NULL);
9565 mem = gen_frame_mem (word_mode, mem);
9566 insn = emit_move_insn (reg, mem);
9568 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9570 /* Previously we'd represented the CFA as an expression
9571 like *(%ebp - 8). We've just popped that value from
9572 the stack, which means we need to reset the CFA to
9573 the drap register. This will remain until we restore
9574 the stack pointer. */
9575 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9576 RTX_FRAME_RELATED_P (insn) = 1;
9578 /* This means that the DRAP register is valid for addressing. */
9579 m->fs.drap_valid = true;
9581 else
9582 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9584 cfa_offset -= UNITS_PER_WORD;
9588 /* Emit code to restore saved registers using MOV insns.
9589 First register is restored from CFA - CFA_OFFSET. */
9590 static void
9591 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9592 bool maybe_eh_return)
9594 unsigned int regno;
9596 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9597 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
9599 rtx reg = gen_rtx_REG (V4SFmode, regno);
9600 rtx mem;
9601 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
9603 mem = choose_baseaddr (cfa_offset, &align);
9604 mem = gen_rtx_MEM (V4SFmode, mem);
9606 /* The location aligment depends upon the base register. */
9607 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
9608 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
9609 set_mem_align (mem, align);
9610 emit_insn (gen_rtx_SET (reg, mem));
9612 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9614 cfa_offset -= GET_MODE_SIZE (V4SFmode);
9618 static void
9619 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
9620 bool use_call, int style)
9622 struct machine_function *m = cfun->machine;
9623 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
9624 + m->call_ms2sysv_extra_regs;
9625 rtvec v;
9626 unsigned int elems_needed, align, i, vi = 0;
9627 rtx_insn *insn;
9628 rtx sym, tmp;
9629 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
9630 rtx r10 = NULL_RTX;
9631 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
9632 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
9633 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
9634 rtx rsi_frame_load = NULL_RTX;
9635 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
9636 enum xlogue_stub stub;
9638 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
9640 /* If using a realigned stack, we should never start with padding. */
9641 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
9643 /* Setup RSI as the stub's base pointer. */
9644 align = GET_MODE_ALIGNMENT (V4SFmode);
9645 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
9646 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
9648 emit_insn (gen_rtx_SET (rsi, tmp));
9650 /* Get a symbol for the stub. */
9651 if (frame_pointer_needed)
9652 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
9653 : XLOGUE_STUB_RESTORE_HFP_TAIL;
9654 else
9655 stub = use_call ? XLOGUE_STUB_RESTORE
9656 : XLOGUE_STUB_RESTORE_TAIL;
9657 sym = xlogue.get_stub_rtx (stub);
9659 elems_needed = ncregs;
9660 if (use_call)
9661 elems_needed += 1;
9662 else
9663 elems_needed += frame_pointer_needed ? 5 : 3;
9664 v = rtvec_alloc (elems_needed);
9666 /* We call the epilogue stub when we need to pop incoming args or we are
9667 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
9668 epilogue stub and it is the tail-call. */
9669 if (use_call)
9670 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9671 else
9673 RTVEC_ELT (v, vi++) = ret_rtx;
9674 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9675 if (frame_pointer_needed)
9677 rtx rbp = gen_rtx_REG (DImode, BP_REG);
9678 gcc_assert (m->fs.fp_valid);
9679 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
9681 tmp = plus_constant (DImode, rbp, 8);
9682 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
9683 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
9684 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
9685 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
9687 else
9689 /* If no hard frame pointer, we set R10 to the SP restore value. */
9690 gcc_assert (!m->fs.fp_valid);
9691 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9692 gcc_assert (m->fs.sp_valid);
9694 r10 = gen_rtx_REG (DImode, R10_REG);
9695 tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
9696 emit_insn (gen_rtx_SET (r10, tmp));
9698 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
9702 /* Generate frame load insns and restore notes. */
9703 for (i = 0; i < ncregs; ++i)
9705 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
9706 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
9707 rtx reg, frame_load;
9709 reg = gen_rtx_REG (mode, r.regno);
9710 frame_load = gen_frame_load (reg, rsi, r.offset);
9712 /* Save RSI frame load insn & note to add last. */
9713 if (r.regno == SI_REG)
9715 gcc_assert (!rsi_frame_load);
9716 rsi_frame_load = frame_load;
9717 rsi_restore_offset = r.offset;
9719 else
9721 RTVEC_ELT (v, vi++) = frame_load;
9722 ix86_add_cfa_restore_note (NULL, reg, r.offset);
9726 /* Add RSI frame load & restore note at the end. */
9727 gcc_assert (rsi_frame_load);
9728 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
9729 RTVEC_ELT (v, vi++) = rsi_frame_load;
9730 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
9731 rsi_restore_offset);
9733 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
9734 if (!use_call && !frame_pointer_needed)
9736 gcc_assert (m->fs.sp_valid);
9737 gcc_assert (!m->fs.sp_realigned);
9739 /* At this point, R10 should point to frame.stack_realign_offset. */
9740 if (m->fs.cfa_reg == stack_pointer_rtx)
9741 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
9742 m->fs.sp_offset = frame.stack_realign_offset;
9745 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
9746 tmp = gen_rtx_PARALLEL (VOIDmode, v);
9747 if (use_call)
9748 insn = emit_insn (tmp);
9749 else
9751 insn = emit_jump_insn (tmp);
9752 JUMP_LABEL (insn) = ret_rtx;
9754 if (frame_pointer_needed)
9755 ix86_emit_leave (insn);
9756 else
9758 /* Need CFA adjust note. */
9759 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
9760 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
9764 RTX_FRAME_RELATED_P (insn) = true;
9765 ix86_add_queued_cfa_restore_notes (insn);
9767 /* If we're not doing a tail-call, we need to adjust the stack. */
9768 if (use_call && m->fs.sp_valid)
9770 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
9771 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9772 GEN_INT (dealloc), style,
9773 m->fs.cfa_reg == stack_pointer_rtx);
9777 /* Restore function stack, frame, and registers. */
9779 void
9780 ix86_expand_epilogue (int style)
9782 struct machine_function *m = cfun->machine;
9783 struct machine_frame_state frame_state_save = m->fs;
9784 bool restore_regs_via_mov;
9785 bool using_drap;
9786 bool restore_stub_is_tail = false;
9788 if (ix86_function_naked (current_function_decl))
9790 /* The program should not reach this point. */
9791 emit_insn (gen_ud2 ());
9792 return;
9795 ix86_finalize_stack_frame_flags ();
9796 const struct ix86_frame &frame = cfun->machine->frame;
9798 m->fs.sp_realigned = stack_realign_fp;
9799 m->fs.sp_valid = stack_realign_fp
9800 || !frame_pointer_needed
9801 || crtl->sp_is_unchanging;
9802 gcc_assert (!m->fs.sp_valid
9803 || m->fs.sp_offset == frame.stack_pointer_offset);
9805 /* The FP must be valid if the frame pointer is present. */
9806 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
9807 gcc_assert (!m->fs.fp_valid
9808 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
9810 /* We must have *some* valid pointer to the stack frame. */
9811 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
9813 /* The DRAP is never valid at this point. */
9814 gcc_assert (!m->fs.drap_valid);
9816 /* See the comment about red zone and frame
9817 pointer usage in ix86_expand_prologue. */
9818 if (frame_pointer_needed && frame.red_zone_size)
9819 emit_insn (gen_memory_blockage ());
9821 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9822 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
9824 /* Determine the CFA offset of the end of the red-zone. */
9825 m->fs.red_zone_offset = 0;
9826 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
9828 /* The red-zone begins below return address and error code in
9829 exception handler. */
9830 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
9832 /* When the register save area is in the aligned portion of
9833 the stack, determine the maximum runtime displacement that
9834 matches up with the aligned frame. */
9835 if (stack_realign_drap)
9836 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
9837 + UNITS_PER_WORD);
9840 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
9842 /* Special care must be taken for the normal return case of a function
9843 using eh_return: the eax and edx registers are marked as saved, but
9844 not restored along this path. Adjust the save location to match. */
9845 if (crtl->calls_eh_return && style != 2)
9846 reg_save_offset -= 2 * UNITS_PER_WORD;
9848 /* EH_RETURN requires the use of moves to function properly. */
9849 if (crtl->calls_eh_return)
9850 restore_regs_via_mov = true;
9851 /* SEH requires the use of pops to identify the epilogue. */
9852 else if (TARGET_SEH)
9853 restore_regs_via_mov = false;
9854 /* If we're only restoring one register and sp cannot be used then
9855 using a move instruction to restore the register since it's
9856 less work than reloading sp and popping the register. */
9857 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
9858 restore_regs_via_mov = true;
9859 else if (TARGET_EPILOGUE_USING_MOVE
9860 && cfun->machine->use_fast_prologue_epilogue
9861 && (frame.nregs > 1
9862 || m->fs.sp_offset != reg_save_offset))
9863 restore_regs_via_mov = true;
9864 else if (frame_pointer_needed
9865 && !frame.nregs
9866 && m->fs.sp_offset != reg_save_offset)
9867 restore_regs_via_mov = true;
9868 else if (frame_pointer_needed
9869 && TARGET_USE_LEAVE
9870 && cfun->machine->use_fast_prologue_epilogue
9871 && frame.nregs == 1)
9872 restore_regs_via_mov = true;
9873 else
9874 restore_regs_via_mov = false;
9876 if (restore_regs_via_mov || frame.nsseregs)
9878 /* Ensure that the entire register save area is addressable via
9879 the stack pointer, if we will restore SSE regs via sp. */
9880 if (TARGET_64BIT
9881 && m->fs.sp_offset > 0x7fffffff
9882 && sp_valid_at (frame.stack_realign_offset + 1)
9883 && (frame.nsseregs + frame.nregs) != 0)
9885 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9886 GEN_INT (m->fs.sp_offset
9887 - frame.sse_reg_save_offset),
9888 style,
9889 m->fs.cfa_reg == stack_pointer_rtx);
9893 /* If there are any SSE registers to restore, then we have to do it
9894 via moves, since there's obviously no pop for SSE regs. */
9895 if (frame.nsseregs)
9896 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
9897 style == 2);
9899 if (m->call_ms2sysv)
9901 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
9903 /* We cannot use a tail-call for the stub if:
9904 1. We have to pop incoming args,
9905 2. We have additional int regs to restore, or
9906 3. A sibling call will be the tail-call, or
9907 4. We are emitting an eh_return_internal epilogue.
9909 TODO: Item 4 has not yet tested!
9911 If any of the above are true, we will call the stub rather than
9912 jump to it. */
9913 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
9914 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
9917 /* If using out-of-line stub that is a tail-call, then...*/
9918 if (m->call_ms2sysv && restore_stub_is_tail)
9920 /* TODO: parinoid tests. (remove eventually) */
9921 gcc_assert (m->fs.sp_valid);
9922 gcc_assert (!m->fs.sp_realigned);
9923 gcc_assert (!m->fs.fp_valid);
9924 gcc_assert (!m->fs.realigned);
9925 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
9926 gcc_assert (!crtl->drap_reg);
9927 gcc_assert (!frame.nregs);
9929 else if (restore_regs_via_mov)
9931 rtx t;
9933 if (frame.nregs)
9934 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
9936 /* eh_return epilogues need %ecx added to the stack pointer. */
9937 if (style == 2)
9939 rtx sa = EH_RETURN_STACKADJ_RTX;
9940 rtx_insn *insn;
9942 /* Stack realignment doesn't work with eh_return. */
9943 if (crtl->stack_realign_needed)
9944 sorry ("Stack realignment not supported with "
9945 "%<__builtin_eh_return%>");
9947 /* regparm nested functions don't work with eh_return. */
9948 if (ix86_static_chain_on_stack)
9949 sorry ("regparm nested function not supported with "
9950 "%<__builtin_eh_return%>");
9952 if (frame_pointer_needed)
9954 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9955 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
9956 emit_insn (gen_rtx_SET (sa, t));
9958 /* NB: eh_return epilogues must restore the frame pointer
9959 in word_mode since the upper 32 bits of RBP register
9960 can have any values. */
9961 t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
9962 rtx frame_reg = gen_rtx_REG (word_mode,
9963 HARD_FRAME_POINTER_REGNUM);
9964 insn = emit_move_insn (frame_reg, t);
9966 /* Note that we use SA as a temporary CFA, as the return
9967 address is at the proper place relative to it. We
9968 pretend this happens at the FP restore insn because
9969 prior to this insn the FP would be stored at the wrong
9970 offset relative to SA, and after this insn we have no
9971 other reasonable register to use for the CFA. We don't
9972 bother resetting the CFA to the SP for the duration of
9973 the return insn, unless the control flow instrumentation
9974 is done. In this case the SP is used later and we have
9975 to reset CFA to SP. */
9976 add_reg_note (insn, REG_CFA_DEF_CFA,
9977 plus_constant (Pmode, sa, UNITS_PER_WORD));
9978 ix86_add_queued_cfa_restore_notes (insn);
9979 add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
9980 RTX_FRAME_RELATED_P (insn) = 1;
9982 m->fs.cfa_reg = sa;
9983 m->fs.cfa_offset = UNITS_PER_WORD;
9984 m->fs.fp_valid = false;
9986 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9987 const0_rtx, style,
9988 flag_cf_protection);
9990 else
9992 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9993 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
9994 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
9995 ix86_add_queued_cfa_restore_notes (insn);
9997 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9998 if (m->fs.cfa_offset != UNITS_PER_WORD)
10000 m->fs.cfa_offset = UNITS_PER_WORD;
10001 add_reg_note (insn, REG_CFA_DEF_CFA,
10002 plus_constant (Pmode, stack_pointer_rtx,
10003 UNITS_PER_WORD));
10004 RTX_FRAME_RELATED_P (insn) = 1;
10007 m->fs.sp_offset = UNITS_PER_WORD;
10008 m->fs.sp_valid = true;
10009 m->fs.sp_realigned = false;
10012 else
10014 /* SEH requires that the function end with (1) a stack adjustment
10015 if necessary, (2) a sequence of pops, and (3) a return or
10016 jump instruction. Prevent insns from the function body from
10017 being scheduled into this sequence. */
10018 if (TARGET_SEH)
10020 /* Prevent a catch region from being adjacent to the standard
10021 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
10022 nor several other flags that would be interesting to test are
10023 set up yet. */
10024 if (flag_non_call_exceptions)
10025 emit_insn (gen_nops (const1_rtx));
10026 else
10027 emit_insn (gen_blockage ());
10030 /* First step is to deallocate the stack frame so that we can
10031 pop the registers. If the stack pointer was realigned, it needs
10032 to be restored now. Also do it on SEH target for very large
10033 frame as the emitted instructions aren't allowed by the ABI
10034 in epilogues. */
10035 if (!m->fs.sp_valid || m->fs.sp_realigned
10036 || (TARGET_SEH
10037 && (m->fs.sp_offset - reg_save_offset
10038 >= SEH_MAX_FRAME_SIZE)))
10040 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10041 GEN_INT (m->fs.fp_offset
10042 - reg_save_offset),
10043 style, false);
10045 else if (m->fs.sp_offset != reg_save_offset)
10047 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10048 GEN_INT (m->fs.sp_offset
10049 - reg_save_offset),
10050 style,
10051 m->fs.cfa_reg == stack_pointer_rtx);
10054 if (TARGET_APX_PUSH2POP2
10055 && ix86_can_use_push2pop2 ()
10056 && m->func_type == TYPE_NORMAL)
10057 ix86_emit_restore_regs_using_pop2 ();
10058 else
10059 ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
10062 /* If we used a stack pointer and haven't already got rid of it,
10063 then do so now. */
10064 if (m->fs.fp_valid)
10066 /* If the stack pointer is valid and pointing at the frame
10067 pointer store address, then we only need a pop. */
10068 if (sp_valid_at (frame.hfp_save_offset)
10069 && m->fs.sp_offset == frame.hfp_save_offset)
10070 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10071 /* Leave results in shorter dependency chains on CPUs that are
10072 able to grok it fast. */
10073 else if (TARGET_USE_LEAVE
10074 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
10075 || !cfun->machine->use_fast_prologue_epilogue)
10076 ix86_emit_leave (NULL);
10077 else
10079 pro_epilogue_adjust_stack (stack_pointer_rtx,
10080 hard_frame_pointer_rtx,
10081 const0_rtx, style, !using_drap);
10082 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10086 if (using_drap)
10088 int param_ptr_offset = UNITS_PER_WORD;
10089 rtx_insn *insn;
10091 gcc_assert (stack_realign_drap);
10093 if (ix86_static_chain_on_stack)
10094 param_ptr_offset += UNITS_PER_WORD;
10095 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10096 param_ptr_offset += UNITS_PER_WORD;
10098 insn = emit_insn (gen_rtx_SET
10099 (stack_pointer_rtx,
10100 plus_constant (Pmode, crtl->drap_reg,
10101 -param_ptr_offset)));
10102 m->fs.cfa_reg = stack_pointer_rtx;
10103 m->fs.cfa_offset = param_ptr_offset;
10104 m->fs.sp_offset = param_ptr_offset;
10105 m->fs.realigned = false;
10107 add_reg_note (insn, REG_CFA_DEF_CFA,
10108 plus_constant (Pmode, stack_pointer_rtx,
10109 param_ptr_offset));
10110 RTX_FRAME_RELATED_P (insn) = 1;
10112 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
10113 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10116 /* At this point the stack pointer must be valid, and we must have
10117 restored all of the registers. We may not have deallocated the
10118 entire stack frame. We've delayed this until now because it may
10119 be possible to merge the local stack deallocation with the
10120 deallocation forced by ix86_static_chain_on_stack. */
10121 gcc_assert (m->fs.sp_valid);
10122 gcc_assert (!m->fs.sp_realigned);
10123 gcc_assert (!m->fs.fp_valid);
10124 gcc_assert (!m->fs.realigned);
10125 if (m->fs.sp_offset != UNITS_PER_WORD)
10127 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10128 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10129 style, true);
10131 else
10132 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10134 /* Sibcall epilogues don't want a return instruction. */
10135 if (style == 0)
10137 m->fs = frame_state_save;
10138 return;
10141 if (cfun->machine->func_type != TYPE_NORMAL)
10142 emit_jump_insn (gen_interrupt_return ());
10143 else if (crtl->args.pops_args && crtl->args.size)
10145 rtx popc = GEN_INT (crtl->args.pops_args);
10147 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10148 address, do explicit add, and jump indirectly to the caller. */
10150 if (crtl->args.pops_args >= 65536)
10152 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10153 rtx_insn *insn;
10155 /* There is no "pascal" calling convention in any 64bit ABI. */
10156 gcc_assert (!TARGET_64BIT);
10158 insn = emit_insn (gen_pop (ecx));
10159 m->fs.cfa_offset -= UNITS_PER_WORD;
10160 m->fs.sp_offset -= UNITS_PER_WORD;
10162 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10163 x = gen_rtx_SET (stack_pointer_rtx, x);
10164 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10165 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10166 RTX_FRAME_RELATED_P (insn) = 1;
10168 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10169 popc, -1, true);
10170 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10172 else
10173 emit_jump_insn (gen_simple_return_pop_internal (popc));
10175 else if (!m->call_ms2sysv || !restore_stub_is_tail)
10177 /* In case of return from EH a simple return cannot be used
10178 as a return address will be compared with a shadow stack
10179 return address. Use indirect jump instead. */
10180 if (style == 2 && flag_cf_protection)
10182 /* Register used in indirect jump must be in word_mode. But
10183 Pmode may not be the same as word_mode for x32. */
10184 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
10185 rtx_insn *insn;
10187 insn = emit_insn (gen_pop (ecx));
10188 m->fs.cfa_offset -= UNITS_PER_WORD;
10189 m->fs.sp_offset -= UNITS_PER_WORD;
10191 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10192 x = gen_rtx_SET (stack_pointer_rtx, x);
10193 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10194 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
10195 RTX_FRAME_RELATED_P (insn) = 1;
10197 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10199 else
10200 emit_jump_insn (gen_simple_return_internal ());
10203 /* Restore the state back to the state from the prologue,
10204 so that it's correct for the next epilogue. */
10205 m->fs = frame_state_save;
10208 /* Reset from the function's potential modifications. */
10210 static void
10211 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
10213 if (pic_offset_table_rtx
10214 && !ix86_use_pseudo_pic_reg ())
10215 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10217 if (TARGET_MACHO)
10219 rtx_insn *insn = get_last_insn ();
10220 rtx_insn *deleted_debug_label = NULL;
10222 /* Mach-O doesn't support labels at the end of objects, so if
10223 it looks like we might want one, take special action.
10224 First, collect any sequence of deleted debug labels. */
10225 while (insn
10226 && NOTE_P (insn)
10227 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10229 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
10230 notes only, instead set their CODE_LABEL_NUMBER to -1,
10231 otherwise there would be code generation differences
10232 in between -g and -g0. */
10233 if (NOTE_P (insn) && NOTE_KIND (insn)
10234 == NOTE_INSN_DELETED_DEBUG_LABEL)
10235 deleted_debug_label = insn;
10236 insn = PREV_INSN (insn);
10239 /* If we have:
10240 label:
10241 barrier
10242 then this needs to be detected, so skip past the barrier. */
10244 if (insn && BARRIER_P (insn))
10245 insn = PREV_INSN (insn);
10247 /* Up to now we've only seen notes or barriers. */
10248 if (insn)
10250 if (LABEL_P (insn)
10251 || (NOTE_P (insn)
10252 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
10253 /* Trailing label. */
10254 fputs ("\tnop\n", file);
10255 else if (cfun && ! cfun->is_thunk)
10257 /* See if we have a completely empty function body, skipping
10258 the special case of the picbase thunk emitted as asm. */
10259 while (insn && ! INSN_P (insn))
10260 insn = PREV_INSN (insn);
10261 /* If we don't find any insns, we've got an empty function body;
10262 I.e. completely empty - without a return or branch. This is
10263 taken as the case where a function body has been removed
10264 because it contains an inline __builtin_unreachable(). GCC
10265 declares that reaching __builtin_unreachable() means UB so
10266 we're not obliged to do anything special; however, we want
10267 non-zero-sized function bodies. To meet this, and help the
10268 user out, let's trap the case. */
10269 if (insn == NULL)
10270 fputs ("\tud2\n", file);
10273 else if (deleted_debug_label)
10274 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
10275 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
10276 CODE_LABEL_NUMBER (insn) = -1;
10280 /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
10282 void
10283 ix86_print_patchable_function_entry (FILE *file,
10284 unsigned HOST_WIDE_INT patch_area_size,
10285 bool record_p)
10287 if (cfun->machine->function_label_emitted)
10289 /* NB: When ix86_print_patchable_function_entry is called after
10290 function table has been emitted, we have inserted or queued
10291 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
10292 place. There is nothing to do here. */
10293 return;
10296 default_print_patchable_function_entry (file, patch_area_size,
10297 record_p);
10300 /* Output patchable area. NB: default_print_patchable_function_entry
10301 isn't available in i386.md. */
10303 void
10304 ix86_output_patchable_area (unsigned int patch_area_size,
10305 bool record_p)
10307 default_print_patchable_function_entry (asm_out_file,
10308 patch_area_size,
10309 record_p);
10312 /* Return a scratch register to use in the split stack prologue. The
10313 split stack prologue is used for -fsplit-stack. It is the first
10314 instructions in the function, even before the regular prologue.
10315 The scratch register can be any caller-saved register which is not
10316 used for parameters or for the static chain. */
10318 static unsigned int
10319 split_stack_prologue_scratch_regno (void)
10321 if (TARGET_64BIT)
10322 return R11_REG;
10323 else
10325 bool is_fastcall, is_thiscall;
10326 int regparm;
10328 is_fastcall = (lookup_attribute ("fastcall",
10329 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10330 != NULL);
10331 is_thiscall = (lookup_attribute ("thiscall",
10332 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10333 != NULL);
10334 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10336 if (is_fastcall)
10338 if (DECL_STATIC_CHAIN (cfun->decl))
10340 sorry ("%<-fsplit-stack%> does not support fastcall with "
10341 "nested function");
10342 return INVALID_REGNUM;
10344 return AX_REG;
10346 else if (is_thiscall)
10348 if (!DECL_STATIC_CHAIN (cfun->decl))
10349 return DX_REG;
10350 return AX_REG;
10352 else if (regparm < 3)
10354 if (!DECL_STATIC_CHAIN (cfun->decl))
10355 return CX_REG;
10356 else
10358 if (regparm >= 2)
10360 sorry ("%<-fsplit-stack%> does not support 2 register "
10361 "parameters for a nested function");
10362 return INVALID_REGNUM;
10364 return DX_REG;
10367 else
10369 /* FIXME: We could make this work by pushing a register
10370 around the addition and comparison. */
10371 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
10372 return INVALID_REGNUM;
10377 /* A SYMBOL_REF for the function which allocates new stackspace for
10378 -fsplit-stack. */
10380 static GTY(()) rtx split_stack_fn;
10382 /* A SYMBOL_REF for the more stack function when using the large
10383 model. */
10385 static GTY(()) rtx split_stack_fn_large;
10387 /* Return location of the stack guard value in the TLS block. */
10390 ix86_split_stack_guard (void)
10392 int offset;
10393 addr_space_t as = DEFAULT_TLS_SEG_REG;
10394 rtx r;
10396 gcc_assert (flag_split_stack);
10398 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
10399 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
10400 #else
10401 gcc_unreachable ();
10402 #endif
10404 r = GEN_INT (offset);
10405 r = gen_const_mem (Pmode, r);
10406 set_mem_addr_space (r, as);
10408 return r;
10411 /* Handle -fsplit-stack. These are the first instructions in the
10412 function, even before the regular prologue. */
10414 void
10415 ix86_expand_split_stack_prologue (void)
10417 HOST_WIDE_INT allocate;
10418 unsigned HOST_WIDE_INT args_size;
10419 rtx_code_label *label;
10420 rtx limit, current, allocate_rtx, call_fusage;
10421 rtx_insn *call_insn;
10422 unsigned int scratch_regno = INVALID_REGNUM;
10423 rtx scratch_reg = NULL_RTX;
10424 rtx_code_label *varargs_label = NULL;
10425 rtx fn;
10427 gcc_assert (flag_split_stack && reload_completed);
10429 ix86_finalize_stack_frame_flags ();
10430 struct ix86_frame &frame = cfun->machine->frame;
10431 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10433 /* This is the label we will branch to if we have enough stack
10434 space. We expect the basic block reordering pass to reverse this
10435 branch if optimizing, so that we branch in the unlikely case. */
10436 label = gen_label_rtx ();
10438 /* We need to compare the stack pointer minus the frame size with
10439 the stack boundary in the TCB. The stack boundary always gives
10440 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10441 can compare directly. Otherwise we need to do an addition. */
10443 limit = ix86_split_stack_guard ();
10445 if (allocate >= SPLIT_STACK_AVAILABLE
10446 || flag_force_indirect_call)
10448 scratch_regno = split_stack_prologue_scratch_regno ();
10449 if (scratch_regno == INVALID_REGNUM)
10450 return;
10453 if (allocate >= SPLIT_STACK_AVAILABLE)
10455 rtx offset;
10457 /* We need a scratch register to hold the stack pointer minus
10458 the required frame size. Since this is the very start of the
10459 function, the scratch register can be any caller-saved
10460 register which is not used for parameters. */
10461 offset = GEN_INT (- allocate);
10463 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10464 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10466 /* We don't use gen_add in this case because it will
10467 want to split to lea, but when not optimizing the insn
10468 will not be split after this point. */
10469 emit_insn (gen_rtx_SET (scratch_reg,
10470 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10471 offset)));
10473 else
10475 emit_move_insn (scratch_reg, offset);
10476 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
10478 current = scratch_reg;
10480 else
10481 current = stack_pointer_rtx;
10483 ix86_expand_branch (GEU, current, limit, label);
10484 rtx_insn *jump_insn = get_last_insn ();
10485 JUMP_LABEL (jump_insn) = label;
10487 /* Mark the jump as very likely to be taken. */
10488 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
10490 if (split_stack_fn == NULL_RTX)
10492 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10493 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
10495 fn = split_stack_fn;
10497 /* Get more stack space. We pass in the desired stack space and the
10498 size of the arguments to copy to the new stack. In 32-bit mode
10499 we push the parameters; __morestack will return on a new stack
10500 anyhow. In 64-bit mode we pass the parameters in r10 and
10501 r11. */
10502 allocate_rtx = GEN_INT (allocate);
10503 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
10504 call_fusage = NULL_RTX;
10505 rtx pop = NULL_RTX;
10506 if (TARGET_64BIT)
10508 rtx reg10, reg11;
10510 reg10 = gen_rtx_REG (DImode, R10_REG);
10511 reg11 = gen_rtx_REG (DImode, R11_REG);
10513 /* If this function uses a static chain, it will be in %r10.
10514 Preserve it across the call to __morestack. */
10515 if (DECL_STATIC_CHAIN (cfun->decl))
10517 rtx rax;
10519 rax = gen_rtx_REG (word_mode, AX_REG);
10520 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
10521 use_reg (&call_fusage, rax);
10524 if (flag_force_indirect_call
10525 || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10527 HOST_WIDE_INT argval;
10529 if (split_stack_fn_large == NULL_RTX)
10531 split_stack_fn_large
10532 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10533 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
10536 fn = split_stack_fn_large;
10538 if (ix86_cmodel == CM_LARGE_PIC)
10540 rtx_code_label *label;
10541 rtx x;
10543 gcc_assert (Pmode == DImode);
10545 label = gen_label_rtx ();
10546 emit_label (label);
10547 LABEL_PRESERVE_P (label) = 1;
10548 emit_insn (gen_set_rip_rex64 (reg10, label));
10549 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10550 emit_insn (gen_add2_insn (reg10, reg11));
10551 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT);
10552 x = gen_rtx_CONST (Pmode, x);
10553 emit_move_insn (reg11, x);
10554 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10555 x = gen_const_mem (Pmode, x);
10556 fn = copy_to_suggested_reg (x, reg11, Pmode);
10558 else if (ix86_cmodel == CM_LARGE)
10559 fn = copy_to_suggested_reg (fn, reg11, Pmode);
10561 /* When using the large model we need to load the address
10562 into a register, and we've run out of registers. So we
10563 switch to a different calling convention, and we call a
10564 different function: __morestack_large. We pass the
10565 argument size in the upper 32 bits of r10 and pass the
10566 frame size in the lower 32 bits. */
10567 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
10568 gcc_assert ((args_size & 0xffffffff) == args_size);
10570 argval = ((args_size << 16) << 16) + allocate;
10571 emit_move_insn (reg10, GEN_INT (argval));
10573 else
10575 emit_move_insn (reg10, allocate_rtx);
10576 emit_move_insn (reg11, GEN_INT (args_size));
10577 use_reg (&call_fusage, reg11);
10580 use_reg (&call_fusage, reg10);
10582 else
10584 if (flag_force_indirect_call && flag_pic)
10586 rtx x;
10588 gcc_assert (Pmode == SImode);
10590 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10592 emit_insn (gen_set_got (scratch_reg));
10593 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn),
10594 UNSPEC_GOT);
10595 x = gen_rtx_CONST (Pmode, x);
10596 x = gen_rtx_PLUS (Pmode, scratch_reg, x);
10597 x = gen_const_mem (Pmode, x);
10598 fn = copy_to_suggested_reg (x, scratch_reg, Pmode);
10601 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
10602 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
10603 insn = emit_insn (gen_push (allocate_rtx));
10604 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
10605 pop = GEN_INT (2 * UNITS_PER_WORD);
10608 if (flag_force_indirect_call && !register_operand (fn, VOIDmode))
10610 scratch_reg = gen_rtx_REG (word_mode, scratch_regno);
10612 if (GET_MODE (fn) != word_mode)
10613 fn = gen_rtx_ZERO_EXTEND (word_mode, fn);
10615 fn = copy_to_suggested_reg (fn, scratch_reg, word_mode);
10618 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
10619 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10620 pop, false);
10621 add_function_usage_to (call_insn, call_fusage);
10622 if (!TARGET_64BIT)
10623 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
10624 /* Indicate that this function can't jump to non-local gotos. */
10625 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
10627 /* In order to make call/return prediction work right, we now need
10628 to execute a return instruction. See
10629 libgcc/config/i386/morestack.S for the details on how this works.
10631 For flow purposes gcc must not see this as a return
10632 instruction--we need control flow to continue at the subsequent
10633 label. Therefore, we use an unspec. */
10634 gcc_assert (crtl->args.pops_args < 65536);
10635 rtx_insn *ret_insn
10636 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10638 if ((flag_cf_protection & CF_BRANCH))
10640 /* Insert ENDBR since __morestack will jump back here via indirect
10641 call. */
10642 rtx cet_eb = gen_nop_endbr ();
10643 emit_insn_after (cet_eb, ret_insn);
10646 /* If we are in 64-bit mode and this function uses a static chain,
10647 we saved %r10 in %rax before calling _morestack. */
10648 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10649 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
10650 gen_rtx_REG (word_mode, AX_REG));
10652 /* If this function calls va_start, we need to store a pointer to
10653 the arguments on the old stack, because they may not have been
10654 all copied to the new stack. At this point the old stack can be
10655 found at the frame pointer value used by __morestack, because
10656 __morestack has set that up before calling back to us. Here we
10657 store that pointer in a scratch register, and in
10658 ix86_expand_prologue we store the scratch register in a stack
10659 slot. */
10660 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10662 rtx frame_reg;
10663 int words;
10665 scratch_regno = split_stack_prologue_scratch_regno ();
10666 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10667 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10669 /* 64-bit:
10670 fp -> old fp value
10671 return address within this function
10672 return address of caller of this function
10673 stack arguments
10674 So we add three words to get to the stack arguments.
10676 32-bit:
10677 fp -> old fp value
10678 return address within this function
10679 first argument to __morestack
10680 second argument to __morestack
10681 return address of caller of this function
10682 stack arguments
10683 So we add five words to get to the stack arguments.
10685 words = TARGET_64BIT ? 3 : 5;
10686 emit_insn (gen_rtx_SET (scratch_reg,
10687 plus_constant (Pmode, frame_reg,
10688 words * UNITS_PER_WORD)));
10690 varargs_label = gen_label_rtx ();
10691 emit_jump_insn (gen_jump (varargs_label));
10692 JUMP_LABEL (get_last_insn ()) = varargs_label;
10694 emit_barrier ();
10697 emit_label (label);
10698 LABEL_NUSES (label) = 1;
10700 /* If this function calls va_start, we now have to set the scratch
10701 register for the case where we do not call __morestack. In this
10702 case we need to set it based on the stack pointer. */
10703 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10705 emit_insn (gen_rtx_SET (scratch_reg,
10706 plus_constant (Pmode, stack_pointer_rtx,
10707 UNITS_PER_WORD)));
10709 emit_label (varargs_label);
10710 LABEL_NUSES (varargs_label) = 1;
10714 /* We may have to tell the dataflow pass that the split stack prologue
10715 is initializing a scratch register. */
10717 static void
10718 ix86_live_on_entry (bitmap regs)
10720 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10722 gcc_assert (flag_split_stack);
10723 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10727 /* Extract the parts of an RTL expression that is a valid memory address
10728 for an instruction. Return false if the structure of the address is
10729 grossly off. */
10731 bool
10732 ix86_decompose_address (rtx addr, struct ix86_address *out)
10734 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10735 rtx base_reg, index_reg;
10736 HOST_WIDE_INT scale = 1;
10737 rtx scale_rtx = NULL_RTX;
10738 rtx tmp;
10739 addr_space_t seg = ADDR_SPACE_GENERIC;
10741 /* Allow zero-extended SImode addresses,
10742 they will be emitted with addr32 prefix. */
10743 if (TARGET_64BIT && GET_MODE (addr) == DImode)
10745 if (GET_CODE (addr) == ZERO_EXTEND
10746 && GET_MODE (XEXP (addr, 0)) == SImode)
10748 addr = XEXP (addr, 0);
10749 if (CONST_INT_P (addr))
10750 return false;
10752 else if (GET_CODE (addr) == AND
10753 && const_32bit_mask (XEXP (addr, 1), DImode))
10755 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
10756 if (addr == NULL_RTX)
10757 return false;
10759 if (CONST_INT_P (addr))
10760 return false;
10762 else if (GET_CODE (addr) == AND)
10764 /* For ASHIFT inside AND, combine will not generate
10765 canonical zero-extend. Merge mask for AND and shift_count
10766 to check if it is canonical zero-extend. */
10767 tmp = XEXP (addr, 0);
10768 rtx mask = XEXP (addr, 1);
10769 if (tmp && GET_CODE(tmp) == ASHIFT)
10771 rtx shift_val = XEXP (tmp, 1);
10772 if (CONST_INT_P (mask) && CONST_INT_P (shift_val)
10773 && (((unsigned HOST_WIDE_INT) INTVAL(mask)
10774 | ((HOST_WIDE_INT_1U << INTVAL(shift_val)) - 1))
10775 == 0xffffffff))
10777 addr = lowpart_subreg (SImode, XEXP (addr, 0),
10778 DImode);
10785 /* Allow SImode subregs of DImode addresses,
10786 they will be emitted with addr32 prefix. */
10787 if (TARGET_64BIT && GET_MODE (addr) == SImode)
10789 if (SUBREG_P (addr)
10790 && GET_MODE (SUBREG_REG (addr)) == DImode)
10792 addr = SUBREG_REG (addr);
10793 if (CONST_INT_P (addr))
10794 return false;
10798 if (REG_P (addr))
10799 base = addr;
10800 else if (SUBREG_P (addr))
10802 if (REG_P (SUBREG_REG (addr)))
10803 base = addr;
10804 else
10805 return false;
10807 else if (GET_CODE (addr) == PLUS)
10809 rtx addends[4], op;
10810 int n = 0, i;
10812 op = addr;
10815 if (n >= 4)
10816 return false;
10817 addends[n++] = XEXP (op, 1);
10818 op = XEXP (op, 0);
10820 while (GET_CODE (op) == PLUS);
10821 if (n >= 4)
10822 return false;
10823 addends[n] = op;
10825 for (i = n; i >= 0; --i)
10827 op = addends[i];
10828 switch (GET_CODE (op))
10830 case MULT:
10831 if (index)
10832 return false;
10833 index = XEXP (op, 0);
10834 scale_rtx = XEXP (op, 1);
10835 break;
10837 case ASHIFT:
10838 if (index)
10839 return false;
10840 index = XEXP (op, 0);
10841 tmp = XEXP (op, 1);
10842 if (!CONST_INT_P (tmp))
10843 return false;
10844 scale = INTVAL (tmp);
10845 if ((unsigned HOST_WIDE_INT) scale > 3)
10846 return false;
10847 scale = 1 << scale;
10848 break;
10850 case ZERO_EXTEND:
10851 op = XEXP (op, 0);
10852 if (GET_CODE (op) != UNSPEC)
10853 return false;
10854 /* FALLTHRU */
10856 case UNSPEC:
10857 if (XINT (op, 1) == UNSPEC_TP
10858 && TARGET_TLS_DIRECT_SEG_REFS
10859 && seg == ADDR_SPACE_GENERIC)
10860 seg = DEFAULT_TLS_SEG_REG;
10861 else
10862 return false;
10863 break;
10865 case SUBREG:
10866 if (!REG_P (SUBREG_REG (op)))
10867 return false;
10868 /* FALLTHRU */
10870 case REG:
10871 if (!base)
10872 base = op;
10873 else if (!index)
10874 index = op;
10875 else
10876 return false;
10877 break;
10879 case CONST:
10880 case CONST_INT:
10881 case SYMBOL_REF:
10882 case LABEL_REF:
10883 if (disp)
10884 return false;
10885 disp = op;
10886 break;
10888 default:
10889 return false;
10893 else if (GET_CODE (addr) == MULT)
10895 index = XEXP (addr, 0); /* index*scale */
10896 scale_rtx = XEXP (addr, 1);
10898 else if (GET_CODE (addr) == ASHIFT)
10900 /* We're called for lea too, which implements ashift on occasion. */
10901 index = XEXP (addr, 0);
10902 tmp = XEXP (addr, 1);
10903 if (!CONST_INT_P (tmp))
10904 return false;
10905 scale = INTVAL (tmp);
10906 if ((unsigned HOST_WIDE_INT) scale > 3)
10907 return false;
10908 scale = 1 << scale;
10910 else
10911 disp = addr; /* displacement */
10913 if (index)
10915 if (REG_P (index))
10917 else if (SUBREG_P (index)
10918 && REG_P (SUBREG_REG (index)))
10920 else
10921 return false;
10924 /* Extract the integral value of scale. */
10925 if (scale_rtx)
10927 if (!CONST_INT_P (scale_rtx))
10928 return false;
10929 scale = INTVAL (scale_rtx);
10932 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
10933 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
10935 /* Avoid useless 0 displacement. */
10936 if (disp == const0_rtx && (base || index))
10937 disp = NULL_RTX;
10939 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10940 if (base_reg && index_reg && scale == 1
10941 && (REGNO (index_reg) == ARG_POINTER_REGNUM
10942 || REGNO (index_reg) == FRAME_POINTER_REGNUM
10943 || REGNO (index_reg) == SP_REG))
10945 std::swap (base, index);
10946 std::swap (base_reg, index_reg);
10949 /* Special case: %ebp cannot be encoded as a base without a displacement.
10950 Similarly %r13. */
10951 if (!disp && base_reg
10952 && (REGNO (base_reg) == ARG_POINTER_REGNUM
10953 || REGNO (base_reg) == FRAME_POINTER_REGNUM
10954 || REGNO (base_reg) == BP_REG
10955 || REGNO (base_reg) == R13_REG))
10956 disp = const0_rtx;
10958 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10959 Avoid this by transforming to [%esi+0].
10960 Reload calls address legitimization without cfun defined, so we need
10961 to test cfun for being non-NULL. */
10962 if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
10963 && base_reg && !index_reg && !disp
10964 && REGNO (base_reg) == SI_REG)
10965 disp = const0_rtx;
10967 /* Special case: encode reg+reg instead of reg*2. */
10968 if (!base && index && scale == 2)
10969 base = index, base_reg = index_reg, scale = 1;
10971 /* Special case: scaling cannot be encoded without base or displacement. */
10972 if (!base && !disp && index && scale != 1)
10973 disp = const0_rtx;
10975 out->base = base;
10976 out->index = index;
10977 out->disp = disp;
10978 out->scale = scale;
10979 out->seg = seg;
10981 return true;
10984 /* Return cost of the memory address x.
10985 For i386, it is better to use a complex address than let gcc copy
10986 the address into a reg and make a new pseudo. But not if the address
10987 requires to two regs - that would mean more pseudos with longer
10988 lifetimes. */
10989 static int
10990 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
10992 struct ix86_address parts;
10993 int cost = 1;
10994 int ok = ix86_decompose_address (x, &parts);
10996 gcc_assert (ok);
10998 if (parts.base && SUBREG_P (parts.base))
10999 parts.base = SUBREG_REG (parts.base);
11000 if (parts.index && SUBREG_P (parts.index))
11001 parts.index = SUBREG_REG (parts.index);
11003 /* Attempt to minimize number of registers in the address by increasing
11004 address cost for each used register. We don't increase address cost
11005 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
11006 is not invariant itself it most likely means that base or index is not
11007 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
11008 which is not profitable for x86. */
11009 if (parts.base
11010 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11011 && (current_pass->type == GIMPLE_PASS
11012 || !pic_offset_table_rtx
11013 || !REG_P (parts.base)
11014 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
11015 cost++;
11017 if (parts.index
11018 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11019 && (current_pass->type == GIMPLE_PASS
11020 || !pic_offset_table_rtx
11021 || !REG_P (parts.index)
11022 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
11023 cost++;
11025 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11026 since it's predecode logic can't detect the length of instructions
11027 and it degenerates to vector decoded. Increase cost of such
11028 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11029 to split such addresses or even refuse such addresses at all.
11031 Following addressing modes are affected:
11032 [base+scale*index]
11033 [scale*index+disp]
11034 [base+index]
11036 The first and last case may be avoidable by explicitly coding the zero in
11037 memory address, but I don't have AMD-K6 machine handy to check this
11038 theory. */
11040 if (TARGET_CPU_P (K6)
11041 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11042 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11043 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11044 cost += 10;
11046 return cost;
11049 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11050 this is used for to form addresses to local data when -fPIC is in
11051 use. */
11053 static bool
11054 darwin_local_data_pic (rtx disp)
11056 return (GET_CODE (disp) == UNSPEC
11057 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11060 /* True if the function symbol operand X should be loaded from GOT.
11061 If CALL_P is true, X is a call operand.
11063 NB: -mno-direct-extern-access doesn't force load from GOT for
11064 call.
11066 NB: In 32-bit mode, only non-PIC is allowed in inline assembly
11067 statements, since a PIC register could not be available at the
11068 call site. */
11070 bool
11071 ix86_force_load_from_GOT_p (rtx x, bool call_p)
11073 return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
11074 && !TARGET_PECOFF && !TARGET_MACHO
11075 && (!flag_pic || this_is_asm_operands)
11076 && ix86_cmodel != CM_LARGE
11077 && ix86_cmodel != CM_LARGE_PIC
11078 && GET_CODE (x) == SYMBOL_REF
11079 && ((!call_p
11080 && (!ix86_direct_extern_access
11081 || (SYMBOL_REF_DECL (x)
11082 && lookup_attribute ("nodirect_extern_access",
11083 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
11084 || (SYMBOL_REF_FUNCTION_P (x)
11085 && (!flag_plt
11086 || (SYMBOL_REF_DECL (x)
11087 && lookup_attribute ("noplt",
11088 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
11089 && !SYMBOL_REF_LOCAL_P (x));
11092 /* Determine if a given RTX is a valid constant. We already know this
11093 satisfies CONSTANT_P. */
11095 static bool
11096 ix86_legitimate_constant_p (machine_mode mode, rtx x)
11098 switch (GET_CODE (x))
11100 case CONST:
11101 x = XEXP (x, 0);
11103 if (GET_CODE (x) == PLUS)
11105 if (!CONST_INT_P (XEXP (x, 1)))
11106 return false;
11107 x = XEXP (x, 0);
11110 if (TARGET_MACHO && darwin_local_data_pic (x))
11111 return true;
11113 /* Only some unspecs are valid as "constants". */
11114 if (GET_CODE (x) == UNSPEC)
11115 switch (XINT (x, 1))
11117 case UNSPEC_GOT:
11118 case UNSPEC_GOTOFF:
11119 case UNSPEC_PLTOFF:
11120 return TARGET_64BIT;
11121 case UNSPEC_TPOFF:
11122 case UNSPEC_NTPOFF:
11123 x = XVECEXP (x, 0, 0);
11124 return (GET_CODE (x) == SYMBOL_REF
11125 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11126 case UNSPEC_DTPOFF:
11127 x = XVECEXP (x, 0, 0);
11128 return (GET_CODE (x) == SYMBOL_REF
11129 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11130 default:
11131 return false;
11134 /* We must have drilled down to a symbol. */
11135 if (GET_CODE (x) == LABEL_REF)
11136 return true;
11137 if (GET_CODE (x) != SYMBOL_REF)
11138 return false;
11139 /* FALLTHRU */
11141 case SYMBOL_REF:
11142 /* TLS symbols are never valid. */
11143 if (SYMBOL_REF_TLS_MODEL (x))
11144 return false;
11146 /* DLLIMPORT symbols are never valid. */
11147 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11148 && SYMBOL_REF_DLLIMPORT_P (x))
11149 return false;
11151 #if TARGET_MACHO
11152 /* mdynamic-no-pic */
11153 if (MACHO_DYNAMIC_NO_PIC_P)
11154 return machopic_symbol_defined_p (x);
11155 #endif
11157 /* External function address should be loaded
11158 via the GOT slot to avoid PLT. */
11159 if (ix86_force_load_from_GOT_p (x))
11160 return false;
11162 break;
11164 CASE_CONST_SCALAR_INT:
11165 if (ix86_endbr_immediate_operand (x, VOIDmode))
11166 return false;
11168 switch (mode)
11170 case E_TImode:
11171 if (TARGET_64BIT)
11172 return true;
11173 /* FALLTHRU */
11174 case E_OImode:
11175 case E_XImode:
11176 if (!standard_sse_constant_p (x, mode)
11177 && GET_MODE_SIZE (TARGET_AVX512F && TARGET_EVEX512
11178 ? XImode
11179 : (TARGET_AVX
11180 ? OImode
11181 : (TARGET_SSE2
11182 ? TImode : DImode))) < GET_MODE_SIZE (mode))
11183 return false;
11184 default:
11185 break;
11187 break;
11189 case CONST_VECTOR:
11190 if (!standard_sse_constant_p (x, mode))
11191 return false;
11192 break;
11194 case CONST_DOUBLE:
11195 if (mode == E_BFmode)
11196 return false;
11198 default:
11199 break;
11202 /* Otherwise we handle everything else in the move patterns. */
11203 return true;
11206 /* Determine if it's legal to put X into the constant pool. This
11207 is not possible for the address of thread-local symbols, which
11208 is checked above. */
11210 static bool
11211 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
11213 /* We can put any immediate constant in memory. */
11214 switch (GET_CODE (x))
11216 CASE_CONST_ANY:
11217 return false;
11219 default:
11220 break;
11223 return !ix86_legitimate_constant_p (mode, x);
11226 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
11227 otherwise zero. */
11229 static bool
11230 is_imported_p (rtx x)
11232 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
11233 || GET_CODE (x) != SYMBOL_REF)
11234 return false;
11236 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
11240 /* Nonzero if the constant value X is a legitimate general operand
11241 when generating PIC code. It is given that flag_pic is on and
11242 that X satisfies CONSTANT_P. */
11244 bool
11245 legitimate_pic_operand_p (rtx x)
11247 rtx inner;
11249 switch (GET_CODE (x))
11251 case CONST:
11252 inner = XEXP (x, 0);
11253 if (GET_CODE (inner) == PLUS
11254 && CONST_INT_P (XEXP (inner, 1)))
11255 inner = XEXP (inner, 0);
11257 /* Only some unspecs are valid as "constants". */
11258 if (GET_CODE (inner) == UNSPEC)
11259 switch (XINT (inner, 1))
11261 case UNSPEC_GOT:
11262 case UNSPEC_GOTOFF:
11263 case UNSPEC_PLTOFF:
11264 return TARGET_64BIT;
11265 case UNSPEC_TPOFF:
11266 x = XVECEXP (inner, 0, 0);
11267 return (GET_CODE (x) == SYMBOL_REF
11268 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11269 case UNSPEC_MACHOPIC_OFFSET:
11270 return legitimate_pic_address_disp_p (x);
11271 default:
11272 return false;
11274 /* FALLTHRU */
11276 case SYMBOL_REF:
11277 case LABEL_REF:
11278 return legitimate_pic_address_disp_p (x);
11280 default:
11281 return true;
11285 /* Determine if a given CONST RTX is a valid memory displacement
11286 in PIC mode. */
11288 bool
11289 legitimate_pic_address_disp_p (rtx disp)
11291 bool saw_plus;
11293 /* In 64bit mode we can allow direct addresses of symbols and labels
11294 when they are not dynamic symbols. */
11295 if (TARGET_64BIT)
11297 rtx op0 = disp, op1;
11299 switch (GET_CODE (disp))
11301 case LABEL_REF:
11302 return true;
11304 case CONST:
11305 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11306 break;
11307 op0 = XEXP (XEXP (disp, 0), 0);
11308 op1 = XEXP (XEXP (disp, 0), 1);
11309 if (!CONST_INT_P (op1))
11310 break;
11311 if (GET_CODE (op0) == UNSPEC
11312 && (XINT (op0, 1) == UNSPEC_DTPOFF
11313 || XINT (op0, 1) == UNSPEC_NTPOFF)
11314 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
11315 return true;
11316 if (INTVAL (op1) >= 16*1024*1024
11317 || INTVAL (op1) < -16*1024*1024)
11318 break;
11319 if (GET_CODE (op0) == LABEL_REF)
11320 return true;
11321 if (GET_CODE (op0) == CONST
11322 && GET_CODE (XEXP (op0, 0)) == UNSPEC
11323 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11324 return true;
11325 if (GET_CODE (op0) == UNSPEC
11326 && XINT (op0, 1) == UNSPEC_PCREL)
11327 return true;
11328 if (GET_CODE (op0) != SYMBOL_REF)
11329 break;
11330 /* FALLTHRU */
11332 case SYMBOL_REF:
11333 /* TLS references should always be enclosed in UNSPEC.
11334 The dllimported symbol needs always to be resolved. */
11335 if (SYMBOL_REF_TLS_MODEL (op0)
11336 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
11337 return false;
11339 if (TARGET_PECOFF)
11341 if (is_imported_p (op0))
11342 return true;
11344 if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
11345 break;
11347 /* Non-external-weak function symbols need to be resolved only
11348 for the large model. Non-external symbols don't need to be
11349 resolved for large and medium models. For the small model,
11350 we don't need to resolve anything here. */
11351 if ((ix86_cmodel != CM_LARGE_PIC
11352 && SYMBOL_REF_FUNCTION_P (op0)
11353 && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
11354 || !SYMBOL_REF_EXTERNAL_P (op0)
11355 || ix86_cmodel == CM_SMALL_PIC)
11356 return true;
11358 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
11359 && (SYMBOL_REF_LOCAL_P (op0)
11360 || ((ix86_direct_extern_access
11361 && !(SYMBOL_REF_DECL (op0)
11362 && lookup_attribute ("nodirect_extern_access",
11363 DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
11364 && HAVE_LD_PIE_COPYRELOC
11365 && flag_pie
11366 && !SYMBOL_REF_WEAK (op0)
11367 && !SYMBOL_REF_FUNCTION_P (op0)))
11368 && ix86_cmodel != CM_LARGE_PIC)
11369 return true;
11370 break;
11372 default:
11373 break;
11376 if (GET_CODE (disp) != CONST)
11377 return false;
11378 disp = XEXP (disp, 0);
11380 if (TARGET_64BIT)
11382 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11383 of GOT tables. We should not need these anyway. */
11384 if (GET_CODE (disp) != UNSPEC
11385 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11386 && XINT (disp, 1) != UNSPEC_GOTOFF
11387 && XINT (disp, 1) != UNSPEC_PCREL
11388 && XINT (disp, 1) != UNSPEC_PLTOFF))
11389 return false;
11391 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11392 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11393 return false;
11394 return true;
11397 saw_plus = false;
11398 if (GET_CODE (disp) == PLUS)
11400 if (!CONST_INT_P (XEXP (disp, 1)))
11401 return false;
11402 disp = XEXP (disp, 0);
11403 saw_plus = true;
11406 if (TARGET_MACHO && darwin_local_data_pic (disp))
11407 return true;
11409 if (GET_CODE (disp) != UNSPEC)
11410 return false;
11412 switch (XINT (disp, 1))
11414 case UNSPEC_GOT:
11415 if (saw_plus)
11416 return false;
11417 /* We need to check for both symbols and labels because VxWorks loads
11418 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11419 details. */
11420 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11421 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11422 case UNSPEC_GOTOFF:
11423 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11424 While ABI specify also 32bit relocation but we don't produce it in
11425 small PIC model at all. */
11426 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11427 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11428 && !TARGET_64BIT)
11429 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11430 return false;
11431 case UNSPEC_GOTTPOFF:
11432 case UNSPEC_GOTNTPOFF:
11433 case UNSPEC_INDNTPOFF:
11434 if (saw_plus)
11435 return false;
11436 disp = XVECEXP (disp, 0, 0);
11437 return (GET_CODE (disp) == SYMBOL_REF
11438 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11439 case UNSPEC_NTPOFF:
11440 disp = XVECEXP (disp, 0, 0);
11441 return (GET_CODE (disp) == SYMBOL_REF
11442 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11443 case UNSPEC_DTPOFF:
11444 disp = XVECEXP (disp, 0, 0);
11445 return (GET_CODE (disp) == SYMBOL_REF
11446 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11449 return false;
11452 /* Determine if op is suitable RTX for an address register.
11453 Return naked register if a register or a register subreg is
11454 found, otherwise return NULL_RTX. */
11456 static rtx
11457 ix86_validate_address_register (rtx op)
11459 machine_mode mode = GET_MODE (op);
11461 /* Only SImode or DImode registers can form the address. */
11462 if (mode != SImode && mode != DImode)
11463 return NULL_RTX;
11465 if (REG_P (op))
11466 return op;
11467 else if (SUBREG_P (op))
11469 rtx reg = SUBREG_REG (op);
11471 if (!REG_P (reg))
11472 return NULL_RTX;
11474 mode = GET_MODE (reg);
11476 /* Don't allow SUBREGs that span more than a word. It can
11477 lead to spill failures when the register is one word out
11478 of a two word structure. */
11479 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11480 return NULL_RTX;
11482 /* Allow only SUBREGs of non-eliminable hard registers. */
11483 if (register_no_elim_operand (reg, mode))
11484 return reg;
11487 /* Op is not a register. */
11488 return NULL_RTX;
11491 /* Determine which memory address register set insn can use. */
11493 static enum attr_addr
11494 ix86_memory_address_reg_class (rtx_insn* insn)
11496 /* LRA can do some initialization with NULL insn,
11497 return maximum register class in this case. */
11498 enum attr_addr addr_rclass = ADDR_GPR32;
11500 if (!insn)
11501 return addr_rclass;
11503 if (asm_noperands (PATTERN (insn)) >= 0
11504 || GET_CODE (PATTERN (insn)) == ASM_INPUT)
11505 return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
11507 /* Return maximum register class for unrecognized instructions. */
11508 if (INSN_CODE (insn) < 0)
11509 return addr_rclass;
11511 /* Try to recognize the insn before calling get_attr_addr.
11512 Save current recog_data and current alternative. */
11513 struct recog_data_d saved_recog_data = recog_data;
11514 int saved_alternative = which_alternative;
11516 /* Update recog_data for processing of alternatives. */
11517 extract_insn_cached (insn);
11519 /* If current alternative is not set, loop throught enabled
11520 alternatives and get the most limited register class. */
11521 if (saved_alternative == -1)
11523 alternative_mask enabled = get_enabled_alternatives (insn);
11525 for (int i = 0; i < recog_data.n_alternatives; i++)
11527 if (!TEST_BIT (enabled, i))
11528 continue;
11530 which_alternative = i;
11531 addr_rclass = MIN (addr_rclass, get_attr_addr (insn));
11534 else
11536 which_alternative = saved_alternative;
11537 addr_rclass = get_attr_addr (insn);
11540 recog_data = saved_recog_data;
11541 which_alternative = saved_alternative;
11543 return addr_rclass;
11546 /* Return memory address register class insn can use. */
11548 enum reg_class
11549 ix86_insn_base_reg_class (rtx_insn* insn)
11551 switch (ix86_memory_address_reg_class (insn))
11553 case ADDR_GPR8:
11554 return LEGACY_GENERAL_REGS;
11555 case ADDR_GPR16:
11556 return GENERAL_GPR16;
11557 case ADDR_GPR32:
11558 break;
11559 default:
11560 gcc_unreachable ();
11563 return BASE_REG_CLASS;
11566 bool
11567 ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn)
11569 switch (ix86_memory_address_reg_class (insn))
11571 case ADDR_GPR8:
11572 return LEGACY_INT_REGNO_P (regno);
11573 case ADDR_GPR16:
11574 return GENERAL_GPR16_REGNO_P (regno);
11575 case ADDR_GPR32:
11576 break;
11577 default:
11578 gcc_unreachable ();
11581 return GENERAL_REGNO_P (regno);
11584 enum reg_class
11585 ix86_insn_index_reg_class (rtx_insn* insn)
11587 switch (ix86_memory_address_reg_class (insn))
11589 case ADDR_GPR8:
11590 return LEGACY_INDEX_REGS;
11591 case ADDR_GPR16:
11592 return INDEX_GPR16;
11593 case ADDR_GPR32:
11594 break;
11595 default:
11596 gcc_unreachable ();
11599 return INDEX_REG_CLASS;
11602 /* Recognizes RTL expressions that are valid memory addresses for an
11603 instruction. The MODE argument is the machine mode for the MEM
11604 expression that wants to use this address.
11606 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11607 convert common non-canonical forms to canonical form so that they will
11608 be recognized. */
11610 static bool
11611 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
11612 code_helper = ERROR_MARK)
11614 struct ix86_address parts;
11615 rtx base, index, disp;
11616 HOST_WIDE_INT scale;
11617 addr_space_t seg;
11619 if (ix86_decompose_address (addr, &parts) == 0)
11620 /* Decomposition failed. */
11621 return false;
11623 base = parts.base;
11624 index = parts.index;
11625 disp = parts.disp;
11626 scale = parts.scale;
11627 seg = parts.seg;
11629 /* Validate base register. */
11630 if (base)
11632 rtx reg = ix86_validate_address_register (base);
11634 if (reg == NULL_RTX)
11635 return false;
11637 unsigned int regno = REGNO (reg);
11638 if ((strict && !REGNO_OK_FOR_BASE_P (regno))
11639 || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
11640 /* Base is not valid. */
11641 return false;
11644 /* Validate index register. */
11645 if (index)
11647 rtx reg = ix86_validate_address_register (index);
11649 if (reg == NULL_RTX)
11650 return false;
11652 unsigned int regno = REGNO (reg);
11653 if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
11654 || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
11655 /* Index is not valid. */
11656 return false;
11659 /* Index and base should have the same mode. */
11660 if (base && index
11661 && GET_MODE (base) != GET_MODE (index))
11662 return false;
11664 /* Address override works only on the (%reg) part of %fs:(%reg). */
11665 if (seg != ADDR_SPACE_GENERIC
11666 && ((base && GET_MODE (base) != word_mode)
11667 || (index && GET_MODE (index) != word_mode)))
11668 return false;
11670 /* Validate scale factor. */
11671 if (scale != 1)
11673 if (!index)
11674 /* Scale without index. */
11675 return false;
11677 if (scale != 2 && scale != 4 && scale != 8)
11678 /* Scale is not a valid multiplier. */
11679 return false;
11682 /* Validate displacement. */
11683 if (disp)
11685 if (ix86_endbr_immediate_operand (disp, VOIDmode))
11686 return false;
11688 if (GET_CODE (disp) == CONST
11689 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11690 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11691 switch (XINT (XEXP (disp, 0), 1))
11693 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
11694 when used. While ABI specify also 32bit relocations, we
11695 don't produce them at all and use IP relative instead.
11696 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
11697 should be loaded via GOT. */
11698 case UNSPEC_GOT:
11699 if (!TARGET_64BIT
11700 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11701 goto is_legitimate_pic;
11702 /* FALLTHRU */
11703 case UNSPEC_GOTOFF:
11704 gcc_assert (flag_pic);
11705 if (!TARGET_64BIT)
11706 goto is_legitimate_pic;
11708 /* 64bit address unspec. */
11709 return false;
11711 case UNSPEC_GOTPCREL:
11712 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11713 goto is_legitimate_pic;
11714 /* FALLTHRU */
11715 case UNSPEC_PCREL:
11716 gcc_assert (flag_pic);
11717 goto is_legitimate_pic;
11719 case UNSPEC_GOTTPOFF:
11720 case UNSPEC_GOTNTPOFF:
11721 case UNSPEC_INDNTPOFF:
11722 case UNSPEC_NTPOFF:
11723 case UNSPEC_DTPOFF:
11724 break;
11726 default:
11727 /* Invalid address unspec. */
11728 return false;
11731 else if (SYMBOLIC_CONST (disp)
11732 && (flag_pic
11733 #if TARGET_MACHO
11734 || (MACHOPIC_INDIRECT
11735 && !machopic_operand_p (disp))
11736 #endif
11740 is_legitimate_pic:
11741 if (TARGET_64BIT && (index || base))
11743 /* foo@dtpoff(%rX) is ok. */
11744 if (GET_CODE (disp) != CONST
11745 || GET_CODE (XEXP (disp, 0)) != PLUS
11746 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11747 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11748 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11749 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11750 /* Non-constant pic memory reference. */
11751 return false;
11753 else if ((!TARGET_MACHO || flag_pic)
11754 && ! legitimate_pic_address_disp_p (disp))
11755 /* Displacement is an invalid pic construct. */
11756 return false;
11757 #if TARGET_MACHO
11758 else if (MACHO_DYNAMIC_NO_PIC_P
11759 && !ix86_legitimate_constant_p (Pmode, disp))
11760 /* displacment must be referenced via non_lazy_pointer */
11761 return false;
11762 #endif
11764 /* This code used to verify that a symbolic pic displacement
11765 includes the pic_offset_table_rtx register.
11767 While this is good idea, unfortunately these constructs may
11768 be created by "adds using lea" optimization for incorrect
11769 code like:
11771 int a;
11772 int foo(int i)
11774 return *(&a+i);
11777 This code is nonsensical, but results in addressing
11778 GOT table with pic_offset_table_rtx base. We can't
11779 just refuse it easily, since it gets matched by
11780 "addsi3" pattern, that later gets split to lea in the
11781 case output register differs from input. While this
11782 can be handled by separate addsi pattern for this case
11783 that never results in lea, this seems to be easier and
11784 correct fix for crash to disable this test. */
11786 else if (GET_CODE (disp) != LABEL_REF
11787 && !CONST_INT_P (disp)
11788 && (GET_CODE (disp) != CONST
11789 || !ix86_legitimate_constant_p (Pmode, disp))
11790 && (GET_CODE (disp) != SYMBOL_REF
11791 || !ix86_legitimate_constant_p (Pmode, disp)))
11792 /* Displacement is not constant. */
11793 return false;
11794 else if (TARGET_64BIT
11795 && !x86_64_immediate_operand (disp, VOIDmode))
11796 /* Displacement is out of range. */
11797 return false;
11798 /* In x32 mode, constant addresses are sign extended to 64bit, so
11799 we have to prevent addresses from 0x80000000 to 0xffffffff. */
11800 else if (TARGET_X32 && !(index || base)
11801 && CONST_INT_P (disp)
11802 && val_signbit_known_set_p (SImode, INTVAL (disp)))
11803 return false;
11806 /* Everything looks valid. */
11807 return true;
11810 /* Determine if a given RTX is a valid constant address. */
11812 bool
11813 constant_address_p (rtx x)
11815 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11818 /* Return a unique alias set for the GOT. */
11820 alias_set_type
11821 ix86_GOT_alias_set (void)
11823 static alias_set_type set = -1;
11824 if (set == -1)
11825 set = new_alias_set ();
11826 return set;
11829 /* Return a legitimate reference for ORIG (an address) using the
11830 register REG. If REG is 0, a new pseudo is generated.
11832 There are two types of references that must be handled:
11834 1. Global data references must load the address from the GOT, via
11835 the PIC reg. An insn is emitted to do this load, and the reg is
11836 returned.
11838 2. Static data references, constant pool addresses, and code labels
11839 compute the address as an offset from the GOT, whose base is in
11840 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11841 differentiate them from global data objects. The returned
11842 address is the PIC reg + an unspec constant.
11844 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11845 reg also appears in the address. */
11848 legitimize_pic_address (rtx orig, rtx reg)
11850 rtx addr = orig;
11851 rtx new_rtx = orig;
11853 #if TARGET_MACHO
11854 if (TARGET_MACHO && !TARGET_64BIT)
11856 if (reg == 0)
11857 reg = gen_reg_rtx (Pmode);
11858 /* Use the generic Mach-O PIC machinery. */
11859 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11861 #endif
11863 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11865 rtx tmp = legitimize_pe_coff_symbol (addr, true);
11866 if (tmp)
11867 return tmp;
11870 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11871 new_rtx = addr;
11872 else if ((!TARGET_64BIT
11873 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
11874 && !TARGET_PECOFF
11875 && gotoff_operand (addr, Pmode))
11877 /* This symbol may be referenced via a displacement
11878 from the PIC base address (@GOTOFF). */
11879 if (GET_CODE (addr) == CONST)
11880 addr = XEXP (addr, 0);
11882 if (GET_CODE (addr) == PLUS)
11884 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11885 UNSPEC_GOTOFF);
11886 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11888 else
11889 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11891 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11893 if (TARGET_64BIT)
11894 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11896 if (reg != 0)
11898 gcc_assert (REG_P (reg));
11899 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
11900 new_rtx, reg, 1, OPTAB_DIRECT);
11902 else
11903 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11905 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11906 /* We can't always use @GOTOFF for text labels
11907 on VxWorks, see gotoff_operand. */
11908 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11910 rtx tmp = legitimize_pe_coff_symbol (addr, true);
11911 if (tmp)
11912 return tmp;
11914 /* For x64 PE-COFF there is no GOT table,
11915 so we use address directly. */
11916 if (TARGET_64BIT && TARGET_PECOFF)
11918 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
11919 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11921 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11923 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
11924 UNSPEC_GOTPCREL);
11925 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11926 new_rtx = gen_const_mem (Pmode, new_rtx);
11927 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11929 else
11931 /* This symbol must be referenced via a load
11932 from the Global Offset Table (@GOT). */
11933 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11934 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11936 if (TARGET_64BIT)
11937 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11939 if (reg != 0)
11941 gcc_assert (REG_P (reg));
11942 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
11943 new_rtx, reg, 1, OPTAB_DIRECT);
11945 else
11946 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11948 new_rtx = gen_const_mem (Pmode, new_rtx);
11949 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11952 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11954 else
11956 if (CONST_INT_P (addr)
11957 && !x86_64_immediate_operand (addr, VOIDmode))
11958 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
11959 else if (GET_CODE (addr) == CONST)
11961 addr = XEXP (addr, 0);
11963 /* We must match stuff we generate before. Assume the only
11964 unspecs that can get here are ours. Not that we could do
11965 anything with them anyway.... */
11966 if (GET_CODE (addr) == UNSPEC
11967 || (GET_CODE (addr) == PLUS
11968 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11969 return orig;
11970 gcc_assert (GET_CODE (addr) == PLUS);
11973 if (GET_CODE (addr) == PLUS)
11975 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11977 /* Check first to see if this is a constant
11978 offset from a @GOTOFF symbol reference. */
11979 if (!TARGET_PECOFF
11980 && gotoff_operand (op0, Pmode)
11981 && CONST_INT_P (op1))
11983 if (!TARGET_64BIT)
11985 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11986 UNSPEC_GOTOFF);
11987 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11988 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11990 if (reg != 0)
11992 gcc_assert (REG_P (reg));
11993 new_rtx = expand_simple_binop (Pmode, PLUS,
11994 pic_offset_table_rtx,
11995 new_rtx, reg, 1,
11996 OPTAB_DIRECT);
11998 else
11999 new_rtx
12000 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12002 else
12004 if (INTVAL (op1) < -16*1024*1024
12005 || INTVAL (op1) >= 16*1024*1024)
12007 if (!x86_64_immediate_operand (op1, Pmode))
12008 op1 = force_reg (Pmode, op1);
12010 new_rtx
12011 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12015 else
12017 rtx base = legitimize_pic_address (op0, reg);
12018 machine_mode mode = GET_MODE (base);
12019 new_rtx
12020 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
12022 if (CONST_INT_P (new_rtx))
12024 if (INTVAL (new_rtx) < -16*1024*1024
12025 || INTVAL (new_rtx) >= 16*1024*1024)
12027 if (!x86_64_immediate_operand (new_rtx, mode))
12028 new_rtx = force_reg (mode, new_rtx);
12030 new_rtx
12031 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
12033 else
12034 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
12036 else
12038 /* For %rip addressing, we have to use
12039 just disp32, not base nor index. */
12040 if (TARGET_64BIT
12041 && (GET_CODE (base) == SYMBOL_REF
12042 || GET_CODE (base) == LABEL_REF))
12043 base = force_reg (mode, base);
12044 if (GET_CODE (new_rtx) == PLUS
12045 && CONSTANT_P (XEXP (new_rtx, 1)))
12047 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
12048 new_rtx = XEXP (new_rtx, 1);
12050 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
12055 return new_rtx;
12058 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12060 static rtx
12061 get_thread_pointer (machine_mode tp_mode, bool to_reg)
12063 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12065 if (GET_MODE (tp) != tp_mode)
12067 gcc_assert (GET_MODE (tp) == SImode);
12068 gcc_assert (tp_mode == DImode);
12070 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
12073 if (to_reg)
12074 tp = copy_to_mode_reg (tp_mode, tp);
12076 return tp;
12079 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12081 static GTY(()) rtx ix86_tls_symbol;
12083 static rtx
12084 ix86_tls_get_addr (void)
12086 if (!ix86_tls_symbol)
12088 const char *sym
12089 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12090 ? "___tls_get_addr" : "__tls_get_addr");
12092 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12095 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
12097 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
12098 UNSPEC_PLTOFF);
12099 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
12100 gen_rtx_CONST (Pmode, unspec));
12103 return ix86_tls_symbol;
12106 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12108 static GTY(()) rtx ix86_tls_module_base_symbol;
12111 ix86_tls_module_base (void)
12113 if (!ix86_tls_module_base_symbol)
12115 ix86_tls_module_base_symbol
12116 = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
12118 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12119 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12122 return ix86_tls_module_base_symbol;
12125 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12126 false if we expect this to be used for a memory address and true if
12127 we expect to load the address into a register. */
12130 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12132 rtx dest, base, off;
12133 rtx pic = NULL_RTX, tp = NULL_RTX;
12134 machine_mode tp_mode = Pmode;
12135 int type;
12137 /* Fall back to global dynamic model if tool chain cannot support local
12138 dynamic. */
12139 if (TARGET_SUN_TLS && !TARGET_64BIT
12140 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
12141 && model == TLS_MODEL_LOCAL_DYNAMIC)
12142 model = TLS_MODEL_GLOBAL_DYNAMIC;
12144 switch (model)
12146 case TLS_MODEL_GLOBAL_DYNAMIC:
12147 if (!TARGET_64BIT)
12149 if (flag_pic && !TARGET_PECOFF)
12150 pic = pic_offset_table_rtx;
12151 else
12153 pic = gen_reg_rtx (Pmode);
12154 emit_insn (gen_set_got (pic));
12158 if (TARGET_GNU2_TLS)
12160 dest = gen_reg_rtx (ptr_mode);
12161 if (TARGET_64BIT)
12162 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
12163 else
12164 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12166 tp = get_thread_pointer (ptr_mode, true);
12167 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12168 if (GET_MODE (dest) != Pmode)
12169 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12170 dest = force_reg (Pmode, dest);
12172 if (GET_MODE (x) != Pmode)
12173 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12175 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12177 else
12179 rtx caddr = ix86_tls_get_addr ();
12181 dest = gen_reg_rtx (Pmode);
12182 if (TARGET_64BIT)
12184 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12185 rtx_insn *insns;
12187 start_sequence ();
12188 emit_call_insn
12189 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
12190 insns = get_insns ();
12191 end_sequence ();
12193 if (GET_MODE (x) != Pmode)
12194 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12196 RTL_CONST_CALL_P (insns) = 1;
12197 emit_libcall_block (insns, dest, rax, x);
12199 else
12200 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12202 break;
12204 case TLS_MODEL_LOCAL_DYNAMIC:
12205 if (!TARGET_64BIT)
12207 if (flag_pic)
12208 pic = pic_offset_table_rtx;
12209 else
12211 pic = gen_reg_rtx (Pmode);
12212 emit_insn (gen_set_got (pic));
12216 if (TARGET_GNU2_TLS)
12218 rtx tmp = ix86_tls_module_base ();
12220 base = gen_reg_rtx (ptr_mode);
12221 if (TARGET_64BIT)
12222 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
12223 else
12224 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12226 tp = get_thread_pointer (ptr_mode, true);
12227 if (GET_MODE (base) != Pmode)
12228 base = gen_rtx_ZERO_EXTEND (Pmode, base);
12229 base = force_reg (Pmode, base);
12231 else
12233 rtx caddr = ix86_tls_get_addr ();
12235 base = gen_reg_rtx (Pmode);
12236 if (TARGET_64BIT)
12238 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12239 rtx_insn *insns;
12240 rtx eqv;
12242 start_sequence ();
12243 emit_call_insn
12244 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
12245 insns = get_insns ();
12246 end_sequence ();
12248 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12249 share the LD_BASE result with other LD model accesses. */
12250 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12251 UNSPEC_TLS_LD_BASE);
12253 RTL_CONST_CALL_P (insns) = 1;
12254 emit_libcall_block (insns, base, rax, eqv);
12256 else
12257 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12260 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12261 off = gen_rtx_CONST (Pmode, off);
12263 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12265 if (TARGET_GNU2_TLS)
12267 if (GET_MODE (tp) != Pmode)
12269 dest = lowpart_subreg (ptr_mode, dest, Pmode);
12270 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
12271 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
12273 else
12274 dest = gen_rtx_PLUS (Pmode, tp, dest);
12275 dest = force_reg (Pmode, dest);
12277 if (GET_MODE (x) != Pmode)
12278 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12280 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12282 break;
12284 case TLS_MODEL_INITIAL_EXEC:
12285 if (TARGET_64BIT)
12287 if (TARGET_SUN_TLS && !TARGET_X32)
12289 /* The Sun linker took the AMD64 TLS spec literally
12290 and can only handle %rax as destination of the
12291 initial executable code sequence. */
12293 dest = gen_reg_rtx (DImode);
12294 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12295 return dest;
12298 /* Generate DImode references to avoid %fs:(%reg32)
12299 problems and linker IE->LE relaxation bug. */
12300 tp_mode = DImode;
12301 pic = NULL;
12302 type = UNSPEC_GOTNTPOFF;
12304 else if (flag_pic)
12306 pic = pic_offset_table_rtx;
12307 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12309 else if (!TARGET_ANY_GNU_TLS)
12311 pic = gen_reg_rtx (Pmode);
12312 emit_insn (gen_set_got (pic));
12313 type = UNSPEC_GOTTPOFF;
12315 else
12317 pic = NULL;
12318 type = UNSPEC_INDNTPOFF;
12321 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
12322 off = gen_rtx_CONST (tp_mode, off);
12323 if (pic)
12324 off = gen_rtx_PLUS (tp_mode, pic, off);
12325 off = gen_const_mem (tp_mode, off);
12326 set_mem_alias_set (off, ix86_GOT_alias_set ());
12328 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12330 base = get_thread_pointer (tp_mode,
12331 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12332 off = force_reg (tp_mode, off);
12333 dest = gen_rtx_PLUS (tp_mode, base, off);
12334 if (tp_mode != Pmode)
12335 dest = convert_to_mode (Pmode, dest, 1);
12337 else
12339 base = get_thread_pointer (Pmode, true);
12340 dest = gen_reg_rtx (Pmode);
12341 emit_insn (gen_sub3_insn (dest, base, off));
12343 break;
12345 case TLS_MODEL_LOCAL_EXEC:
12346 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12347 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12348 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12349 off = gen_rtx_CONST (Pmode, off);
12351 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12353 base = get_thread_pointer (Pmode,
12354 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12355 return gen_rtx_PLUS (Pmode, base, off);
12357 else
12359 base = get_thread_pointer (Pmode, true);
12360 dest = gen_reg_rtx (Pmode);
12361 emit_insn (gen_sub3_insn (dest, base, off));
12363 break;
12365 default:
12366 gcc_unreachable ();
12369 return dest;
12372 /* Return true if the TLS address requires insn using integer registers.
12373 It's used to prevent KMOV/VMOV in TLS code sequences which require integer
12374 MOV instructions, refer to PR103275. */
12375 bool
12376 ix86_gpr_tls_address_pattern_p (rtx mem)
12378 gcc_assert (MEM_P (mem));
12380 rtx addr = XEXP (mem, 0);
12381 subrtx_var_iterator::array_type array;
12382 FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
12384 rtx op = *iter;
12385 if (GET_CODE (op) == UNSPEC)
12386 switch (XINT (op, 1))
12388 case UNSPEC_GOTNTPOFF:
12389 return true;
12390 case UNSPEC_TPOFF:
12391 if (!TARGET_64BIT)
12392 return true;
12393 break;
12394 default:
12395 break;
12399 return false;
12402 /* Return true if OP refers to a TLS address. */
12403 bool
12404 ix86_tls_address_pattern_p (rtx op)
12406 subrtx_var_iterator::array_type array;
12407 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
12409 rtx op = *iter;
12410 if (MEM_P (op))
12412 rtx *x = &XEXP (op, 0);
12413 while (GET_CODE (*x) == PLUS)
12415 int i;
12416 for (i = 0; i < 2; i++)
12418 rtx u = XEXP (*x, i);
12419 if (GET_CODE (u) == ZERO_EXTEND)
12420 u = XEXP (u, 0);
12421 if (GET_CODE (u) == UNSPEC
12422 && XINT (u, 1) == UNSPEC_TP)
12423 return true;
12425 x = &XEXP (*x, 0);
12428 iter.skip_subrtxes ();
12432 return false;
12435 /* Rewrite *LOC so that it refers to a default TLS address space. */
12436 void
12437 ix86_rewrite_tls_address_1 (rtx *loc)
12439 subrtx_ptr_iterator::array_type array;
12440 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
12442 rtx *loc = *iter;
12443 if (MEM_P (*loc))
12445 rtx addr = XEXP (*loc, 0);
12446 rtx *x = &addr;
12447 while (GET_CODE (*x) == PLUS)
12449 int i;
12450 for (i = 0; i < 2; i++)
12452 rtx u = XEXP (*x, i);
12453 if (GET_CODE (u) == ZERO_EXTEND)
12454 u = XEXP (u, 0);
12455 if (GET_CODE (u) == UNSPEC
12456 && XINT (u, 1) == UNSPEC_TP)
12458 addr_space_t as = DEFAULT_TLS_SEG_REG;
12460 *x = XEXP (*x, 1 - i);
12462 *loc = replace_equiv_address_nv (*loc, addr, true);
12463 set_mem_addr_space (*loc, as);
12464 return;
12467 x = &XEXP (*x, 0);
12470 iter.skip_subrtxes ();
12475 /* Rewrite instruction pattern involvning TLS address
12476 so that it refers to a default TLS address space. */
12478 ix86_rewrite_tls_address (rtx pattern)
12480 pattern = copy_insn (pattern);
12481 ix86_rewrite_tls_address_1 (&pattern);
12482 return pattern;
12485 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12486 to symbol DECL if BEIMPORT is true. Otherwise create or return the
12487 unique refptr-DECL symbol corresponding to symbol DECL. */
12489 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
12491 static inline hashval_t hash (tree_map *m) { return m->hash; }
12492 static inline bool
12493 equal (tree_map *a, tree_map *b)
12495 return a->base.from == b->base.from;
12498 static int
12499 keep_cache_entry (tree_map *&m)
12501 return ggc_marked_p (m->base.from);
12505 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
12507 static tree
12508 get_dllimport_decl (tree decl, bool beimport)
12510 struct tree_map *h, in;
12511 const char *name;
12512 const char *prefix;
12513 size_t namelen, prefixlen;
12514 char *imp_name;
12515 tree to;
12516 rtx rtl;
12518 if (!dllimport_map)
12519 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
12521 in.hash = htab_hash_pointer (decl);
12522 in.base.from = decl;
12523 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
12524 h = *loc;
12525 if (h)
12526 return h->to;
12528 *loc = h = ggc_alloc<tree_map> ();
12529 h->hash = in.hash;
12530 h->base.from = decl;
12531 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12532 VAR_DECL, NULL, ptr_type_node);
12533 DECL_ARTIFICIAL (to) = 1;
12534 DECL_IGNORED_P (to) = 1;
12535 DECL_EXTERNAL (to) = 1;
12536 TREE_READONLY (to) = 1;
12538 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12539 name = targetm.strip_name_encoding (name);
12540 if (beimport)
12541 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12542 ? "*__imp_" : "*__imp__";
12543 else
12544 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
12545 namelen = strlen (name);
12546 prefixlen = strlen (prefix);
12547 imp_name = (char *) alloca (namelen + prefixlen + 1);
12548 memcpy (imp_name, prefix, prefixlen);
12549 memcpy (imp_name + prefixlen, name, namelen + 1);
12551 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12552 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12553 SET_SYMBOL_REF_DECL (rtl, to);
12554 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
12555 if (!beimport)
12557 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
12558 #ifdef SUB_TARGET_RECORD_STUB
12559 SUB_TARGET_RECORD_STUB (name);
12560 #endif
12563 rtl = gen_const_mem (Pmode, rtl);
12564 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12566 SET_DECL_RTL (to, rtl);
12567 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12569 return to;
12572 /* Expand SYMBOL into its corresponding far-address symbol.
12573 WANT_REG is true if we require the result be a register. */
12575 static rtx
12576 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
12578 tree imp_decl;
12579 rtx x;
12581 gcc_assert (SYMBOL_REF_DECL (symbol));
12582 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
12584 x = DECL_RTL (imp_decl);
12585 if (want_reg)
12586 x = force_reg (Pmode, x);
12587 return x;
12590 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12591 true if we require the result be a register. */
12593 static rtx
12594 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12596 tree imp_decl;
12597 rtx x;
12599 gcc_assert (SYMBOL_REF_DECL (symbol));
12600 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
12602 x = DECL_RTL (imp_decl);
12603 if (want_reg)
12604 x = force_reg (Pmode, x);
12605 return x;
12608 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
12609 is true if we require the result be a register. */
12612 legitimize_pe_coff_symbol (rtx addr, bool inreg)
12614 if (!TARGET_PECOFF)
12615 return NULL_RTX;
12617 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12619 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12620 return legitimize_dllimport_symbol (addr, inreg);
12621 if (GET_CODE (addr) == CONST
12622 && GET_CODE (XEXP (addr, 0)) == PLUS
12623 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12624 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12626 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
12627 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12631 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
12632 return NULL_RTX;
12633 if (GET_CODE (addr) == SYMBOL_REF
12634 && !is_imported_p (addr)
12635 && SYMBOL_REF_EXTERNAL_P (addr)
12636 && SYMBOL_REF_DECL (addr))
12637 return legitimize_pe_coff_extern_decl (addr, inreg);
12639 if (GET_CODE (addr) == CONST
12640 && GET_CODE (XEXP (addr, 0)) == PLUS
12641 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12642 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
12643 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
12644 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
12646 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
12647 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12649 return NULL_RTX;
12652 /* Try machine-dependent ways of modifying an illegitimate address
12653 to be legitimate. If we find one, return the new, valid address.
12654 This macro is used in only one place: `memory_address' in explow.cc.
12656 OLDX is the address as it was before break_out_memory_refs was called.
12657 In some cases it is useful to look at this to decide what needs to be done.
12659 It is always safe for this macro to do nothing. It exists to recognize
12660 opportunities to optimize the output.
12662 For the 80386, we handle X+REG by loading X into a register R and
12663 using R+REG. R will go in a general reg and indexing will be used.
12664 However, if REG is a broken-out memory address or multiplication,
12665 nothing needs to be done because REG can certainly go in a general reg.
12667 When -fpic is used, special handling is needed for symbolic references.
12668 See comments by legitimize_pic_address in i386.cc for details. */
12670 static rtx
12671 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
12673 bool changed = false;
12674 unsigned log;
12676 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12677 if (log)
12678 return legitimize_tls_address (x, (enum tls_model) log, false);
12679 if (GET_CODE (x) == CONST
12680 && GET_CODE (XEXP (x, 0)) == PLUS
12681 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12682 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12684 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12685 (enum tls_model) log, false);
12686 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12689 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12691 rtx tmp = legitimize_pe_coff_symbol (x, true);
12692 if (tmp)
12693 return tmp;
12696 if (flag_pic && SYMBOLIC_CONST (x))
12697 return legitimize_pic_address (x, 0);
12699 #if TARGET_MACHO
12700 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12701 return machopic_indirect_data_reference (x, 0);
12702 #endif
12704 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12705 if (GET_CODE (x) == ASHIFT
12706 && CONST_INT_P (XEXP (x, 1))
12707 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12709 changed = true;
12710 log = INTVAL (XEXP (x, 1));
12711 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12712 GEN_INT (1 << log));
12715 if (GET_CODE (x) == PLUS)
12717 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12719 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12720 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12721 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12723 changed = true;
12724 log = INTVAL (XEXP (XEXP (x, 0), 1));
12725 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12726 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12727 GEN_INT (1 << log));
12730 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12731 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12732 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12734 changed = true;
12735 log = INTVAL (XEXP (XEXP (x, 1), 1));
12736 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12737 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12738 GEN_INT (1 << log));
12741 /* Put multiply first if it isn't already. */
12742 if (GET_CODE (XEXP (x, 1)) == MULT)
12744 std::swap (XEXP (x, 0), XEXP (x, 1));
12745 changed = true;
12748 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12749 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12750 created by virtual register instantiation, register elimination, and
12751 similar optimizations. */
12752 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12754 changed = true;
12755 x = gen_rtx_PLUS (Pmode,
12756 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12757 XEXP (XEXP (x, 1), 0)),
12758 XEXP (XEXP (x, 1), 1));
12761 /* Canonicalize
12762 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12763 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12764 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12765 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12766 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12767 && CONSTANT_P (XEXP (x, 1)))
12769 rtx constant;
12770 rtx other = NULL_RTX;
12772 if (CONST_INT_P (XEXP (x, 1)))
12774 constant = XEXP (x, 1);
12775 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12777 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12779 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12780 other = XEXP (x, 1);
12782 else
12783 constant = 0;
12785 if (constant)
12787 changed = true;
12788 x = gen_rtx_PLUS (Pmode,
12789 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12790 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12791 plus_constant (Pmode, other,
12792 INTVAL (constant)));
12796 if (changed && ix86_legitimate_address_p (mode, x, false))
12797 return x;
12799 if (GET_CODE (XEXP (x, 0)) == MULT)
12801 changed = true;
12802 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
12805 if (GET_CODE (XEXP (x, 1)) == MULT)
12807 changed = true;
12808 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
12811 if (changed
12812 && REG_P (XEXP (x, 1))
12813 && REG_P (XEXP (x, 0)))
12814 return x;
12816 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12818 changed = true;
12819 x = legitimize_pic_address (x, 0);
12822 if (changed && ix86_legitimate_address_p (mode, x, false))
12823 return x;
12825 if (REG_P (XEXP (x, 0)))
12827 rtx temp = gen_reg_rtx (Pmode);
12828 rtx val = force_operand (XEXP (x, 1), temp);
12829 if (val != temp)
12831 val = convert_to_mode (Pmode, val, 1);
12832 emit_move_insn (temp, val);
12835 XEXP (x, 1) = temp;
12836 return x;
12839 else if (REG_P (XEXP (x, 1)))
12841 rtx temp = gen_reg_rtx (Pmode);
12842 rtx val = force_operand (XEXP (x, 0), temp);
12843 if (val != temp)
12845 val = convert_to_mode (Pmode, val, 1);
12846 emit_move_insn (temp, val);
12849 XEXP (x, 0) = temp;
12850 return x;
12854 return x;
12857 /* Print an integer constant expression in assembler syntax. Addition
12858 and subtraction are the only arithmetic that may appear in these
12859 expressions. FILE is the stdio stream to write to, X is the rtx, and
12860 CODE is the operand print code from the output string. */
12862 static void
12863 output_pic_addr_const (FILE *file, rtx x, int code)
12865 char buf[256];
12867 switch (GET_CODE (x))
12869 case PC:
12870 gcc_assert (flag_pic);
12871 putc ('.', file);
12872 break;
12874 case SYMBOL_REF:
12875 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
12876 output_addr_const (file, x);
12877 else
12879 const char *name = XSTR (x, 0);
12881 /* Mark the decl as referenced so that cgraph will
12882 output the function. */
12883 if (SYMBOL_REF_DECL (x))
12884 mark_decl_referenced (SYMBOL_REF_DECL (x));
12886 #if TARGET_MACHO
12887 if (MACHOPIC_INDIRECT
12888 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12889 name = machopic_indirection_name (x, /*stub_p=*/true);
12890 #endif
12891 assemble_name (file, name);
12893 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
12894 && code == 'P' && ix86_call_use_plt_p (x))
12895 fputs ("@PLT", file);
12896 break;
12898 case LABEL_REF:
12899 x = XEXP (x, 0);
12900 /* FALLTHRU */
12901 case CODE_LABEL:
12902 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12903 assemble_name (asm_out_file, buf);
12904 break;
12906 CASE_CONST_SCALAR_INT:
12907 output_addr_const (file, x);
12908 break;
12910 case CONST:
12911 /* This used to output parentheses around the expression,
12912 but that does not work on the 386 (either ATT or BSD assembler). */
12913 output_pic_addr_const (file, XEXP (x, 0), code);
12914 break;
12916 case CONST_DOUBLE:
12917 /* We can't handle floating point constants;
12918 TARGET_PRINT_OPERAND must handle them. */
12919 output_operand_lossage ("floating constant misused");
12920 break;
12922 case PLUS:
12923 /* Some assemblers need integer constants to appear first. */
12924 if (CONST_INT_P (XEXP (x, 0)))
12926 output_pic_addr_const (file, XEXP (x, 0), code);
12927 putc ('+', file);
12928 output_pic_addr_const (file, XEXP (x, 1), code);
12930 else
12932 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12933 output_pic_addr_const (file, XEXP (x, 1), code);
12934 putc ('+', file);
12935 output_pic_addr_const (file, XEXP (x, 0), code);
12937 break;
12939 case MINUS:
12940 if (!TARGET_MACHO)
12941 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12942 output_pic_addr_const (file, XEXP (x, 0), code);
12943 putc ('-', file);
12944 output_pic_addr_const (file, XEXP (x, 1), code);
12945 if (!TARGET_MACHO)
12946 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12947 break;
12949 case UNSPEC:
12950 gcc_assert (XVECLEN (x, 0) == 1);
12951 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12952 switch (XINT (x, 1))
12954 case UNSPEC_GOT:
12955 fputs ("@GOT", file);
12956 break;
12957 case UNSPEC_GOTOFF:
12958 fputs ("@GOTOFF", file);
12959 break;
12960 case UNSPEC_PLTOFF:
12961 fputs ("@PLTOFF", file);
12962 break;
12963 case UNSPEC_PCREL:
12964 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12965 "(%rip)" : "[rip]", file);
12966 break;
12967 case UNSPEC_GOTPCREL:
12968 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12969 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12970 break;
12971 case UNSPEC_GOTTPOFF:
12972 /* FIXME: This might be @TPOFF in Sun ld too. */
12973 fputs ("@gottpoff", file);
12974 break;
12975 case UNSPEC_TPOFF:
12976 fputs ("@tpoff", file);
12977 break;
12978 case UNSPEC_NTPOFF:
12979 if (TARGET_64BIT)
12980 fputs ("@tpoff", file);
12981 else
12982 fputs ("@ntpoff", file);
12983 break;
12984 case UNSPEC_DTPOFF:
12985 fputs ("@dtpoff", file);
12986 break;
12987 case UNSPEC_GOTNTPOFF:
12988 if (TARGET_64BIT)
12989 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12990 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12991 else
12992 fputs ("@gotntpoff", file);
12993 break;
12994 case UNSPEC_INDNTPOFF:
12995 fputs ("@indntpoff", file);
12996 break;
12997 #if TARGET_MACHO
12998 case UNSPEC_MACHOPIC_OFFSET:
12999 putc ('-', file);
13000 machopic_output_function_base_name (file);
13001 break;
13002 #endif
13003 default:
13004 output_operand_lossage ("invalid UNSPEC as operand");
13005 break;
13007 break;
13009 default:
13010 output_operand_lossage ("invalid expression as operand");
13014 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13015 We need to emit DTP-relative relocations. */
13017 static void ATTRIBUTE_UNUSED
13018 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13020 fputs (ASM_LONG, file);
13021 output_addr_const (file, x);
13022 fputs ("@dtpoff", file);
13023 switch (size)
13025 case 4:
13026 break;
13027 case 8:
13028 fputs (", 0", file);
13029 break;
13030 default:
13031 gcc_unreachable ();
13035 /* Return true if X is a representation of the PIC register. This copes
13036 with calls from ix86_find_base_term, where the register might have
13037 been replaced by a cselib value. */
13039 static bool
13040 ix86_pic_register_p (rtx x)
13042 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13043 return (pic_offset_table_rtx
13044 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13045 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
13046 return true;
13047 else if (!REG_P (x))
13048 return false;
13049 else if (pic_offset_table_rtx)
13051 if (REGNO (x) == REGNO (pic_offset_table_rtx))
13052 return true;
13053 if (HARD_REGISTER_P (x)
13054 && !HARD_REGISTER_P (pic_offset_table_rtx)
13055 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
13056 return true;
13057 return false;
13059 else
13060 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13063 /* Helper function for ix86_delegitimize_address.
13064 Attempt to delegitimize TLS local-exec accesses. */
13066 static rtx
13067 ix86_delegitimize_tls_address (rtx orig_x)
13069 rtx x = orig_x, unspec;
13070 struct ix86_address addr;
13072 if (!TARGET_TLS_DIRECT_SEG_REFS)
13073 return orig_x;
13074 if (MEM_P (x))
13075 x = XEXP (x, 0);
13076 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13077 return orig_x;
13078 if (ix86_decompose_address (x, &addr) == 0
13079 || addr.seg != DEFAULT_TLS_SEG_REG
13080 || addr.disp == NULL_RTX
13081 || GET_CODE (addr.disp) != CONST)
13082 return orig_x;
13083 unspec = XEXP (addr.disp, 0);
13084 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13085 unspec = XEXP (unspec, 0);
13086 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13087 return orig_x;
13088 x = XVECEXP (unspec, 0, 0);
13089 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13090 if (unspec != XEXP (addr.disp, 0))
13091 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13092 if (addr.index)
13094 rtx idx = addr.index;
13095 if (addr.scale != 1)
13096 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13097 x = gen_rtx_PLUS (Pmode, idx, x);
13099 if (addr.base)
13100 x = gen_rtx_PLUS (Pmode, addr.base, x);
13101 if (MEM_P (orig_x))
13102 x = replace_equiv_address_nv (orig_x, x);
13103 return x;
13106 /* In the name of slightly smaller debug output, and to cater to
13107 general assembler lossage, recognize PIC+GOTOFF and turn it back
13108 into a direct symbol reference.
13110 On Darwin, this is necessary to avoid a crash, because Darwin
13111 has a different PIC label for each routine but the DWARF debugging
13112 information is not associated with any particular routine, so it's
13113 necessary to remove references to the PIC label from RTL stored by
13114 the DWARF output code.
13116 This helper is used in the normal ix86_delegitimize_address
13117 entrypoint (e.g. used in the target delegitimization hook) and
13118 in ix86_find_base_term. As compile time memory optimization, we
13119 avoid allocating rtxes that will not change anything on the outcome
13120 of the callers (find_base_value and find_base_term). */
13122 static inline rtx
13123 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
13125 rtx orig_x = delegitimize_mem_from_attrs (x);
13126 /* addend is NULL or some rtx if x is something+GOTOFF where
13127 something doesn't include the PIC register. */
13128 rtx addend = NULL_RTX;
13129 /* reg_addend is NULL or a multiple of some register. */
13130 rtx reg_addend = NULL_RTX;
13131 /* const_addend is NULL or a const_int. */
13132 rtx const_addend = NULL_RTX;
13133 /* This is the result, or NULL. */
13134 rtx result = NULL_RTX;
13136 x = orig_x;
13138 if (MEM_P (x))
13139 x = XEXP (x, 0);
13141 if (TARGET_64BIT)
13143 if (GET_CODE (x) == CONST
13144 && GET_CODE (XEXP (x, 0)) == PLUS
13145 && GET_MODE (XEXP (x, 0)) == Pmode
13146 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13147 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13148 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13150 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
13151 base. A CONST can't be arg_pointer_rtx based. */
13152 if (base_term_p && MEM_P (orig_x))
13153 return orig_x;
13154 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13155 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13156 if (MEM_P (orig_x))
13157 x = replace_equiv_address_nv (orig_x, x);
13158 return x;
13161 if (GET_CODE (x) == CONST
13162 && GET_CODE (XEXP (x, 0)) == UNSPEC
13163 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
13164 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
13165 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
13167 x = XVECEXP (XEXP (x, 0), 0, 0);
13168 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13170 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
13171 if (x == NULL_RTX)
13172 return orig_x;
13174 return x;
13177 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
13178 return ix86_delegitimize_tls_address (orig_x);
13180 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13181 and -mcmodel=medium -fpic. */
13184 if (GET_CODE (x) != PLUS
13185 || GET_CODE (XEXP (x, 1)) != CONST)
13186 return ix86_delegitimize_tls_address (orig_x);
13188 if (ix86_pic_register_p (XEXP (x, 0)))
13189 /* %ebx + GOT/GOTOFF */
13191 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13193 /* %ebx + %reg * scale + GOT/GOTOFF */
13194 reg_addend = XEXP (x, 0);
13195 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13196 reg_addend = XEXP (reg_addend, 1);
13197 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13198 reg_addend = XEXP (reg_addend, 0);
13199 else
13201 reg_addend = NULL_RTX;
13202 addend = XEXP (x, 0);
13205 else
13206 addend = XEXP (x, 0);
13208 x = XEXP (XEXP (x, 1), 0);
13209 if (GET_CODE (x) == PLUS
13210 && CONST_INT_P (XEXP (x, 1)))
13212 const_addend = XEXP (x, 1);
13213 x = XEXP (x, 0);
13216 if (GET_CODE (x) == UNSPEC
13217 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13218 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
13219 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
13220 && !MEM_P (orig_x) && !addend)))
13221 result = XVECEXP (x, 0, 0);
13223 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
13224 && !MEM_P (orig_x))
13225 result = XVECEXP (x, 0, 0);
13227 if (! result)
13228 return ix86_delegitimize_tls_address (orig_x);
13230 /* For (PLUS something CONST_INT) both find_base_{value,term} just
13231 recurse on the first operand. */
13232 if (const_addend && !base_term_p)
13233 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13234 if (reg_addend)
13235 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13236 if (addend)
13238 /* If the rest of original X doesn't involve the PIC register, add
13239 addend and subtract pic_offset_table_rtx. This can happen e.g.
13240 for code like:
13241 leal (%ebx, %ecx, 4), %ecx
13243 movl foo@GOTOFF(%ecx), %edx
13244 in which case we return (%ecx - %ebx) + foo
13245 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
13246 and reload has completed. Don't do the latter for debug,
13247 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
13248 if (pic_offset_table_rtx
13249 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
13250 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13251 pic_offset_table_rtx),
13252 result);
13253 else if (base_term_p
13254 && pic_offset_table_rtx
13255 && !TARGET_MACHO
13256 && !TARGET_VXWORKS_RTP)
13258 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13259 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
13260 result = gen_rtx_PLUS (Pmode, tmp, result);
13262 else
13263 return orig_x;
13265 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13267 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
13268 if (result == NULL_RTX)
13269 return orig_x;
13271 return result;
13274 /* The normal instantiation of the above template. */
13276 static rtx
13277 ix86_delegitimize_address (rtx x)
13279 return ix86_delegitimize_address_1 (x, false);
13282 /* If X is a machine specific address (i.e. a symbol or label being
13283 referenced as a displacement from the GOT implemented using an
13284 UNSPEC), then return the base term. Otherwise return X. */
13287 ix86_find_base_term (rtx x)
13289 rtx term;
13291 if (TARGET_64BIT)
13293 if (GET_CODE (x) != CONST)
13294 return x;
13295 term = XEXP (x, 0);
13296 if (GET_CODE (term) == PLUS
13297 && CONST_INT_P (XEXP (term, 1)))
13298 term = XEXP (term, 0);
13299 if (GET_CODE (term) != UNSPEC
13300 || (XINT (term, 1) != UNSPEC_GOTPCREL
13301 && XINT (term, 1) != UNSPEC_PCREL))
13302 return x;
13304 return XVECEXP (term, 0, 0);
13307 return ix86_delegitimize_address_1 (x, true);
13310 /* Return true if X shouldn't be emitted into the debug info.
13311 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
13312 symbol easily into the .debug_info section, so we need not to
13313 delegitimize, but instead assemble as @gotoff.
13314 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
13315 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
13317 static bool
13318 ix86_const_not_ok_for_debug_p (rtx x)
13320 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
13321 return true;
13323 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
13324 return true;
13326 return false;
13329 static void
13330 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
13331 bool fp, FILE *file)
13333 const char *suffix;
13335 if (mode == CCFPmode)
13337 code = ix86_fp_compare_code_to_integer (code);
13338 mode = CCmode;
13340 if (reverse)
13341 code = reverse_condition (code);
13343 switch (code)
13345 case EQ:
13346 gcc_assert (mode != CCGZmode);
13347 switch (mode)
13349 case E_CCAmode:
13350 suffix = "a";
13351 break;
13352 case E_CCCmode:
13353 suffix = "c";
13354 break;
13355 case E_CCOmode:
13356 suffix = "o";
13357 break;
13358 case E_CCPmode:
13359 suffix = "p";
13360 break;
13361 case E_CCSmode:
13362 suffix = "s";
13363 break;
13364 default:
13365 suffix = "e";
13366 break;
13368 break;
13369 case NE:
13370 gcc_assert (mode != CCGZmode);
13371 switch (mode)
13373 case E_CCAmode:
13374 suffix = "na";
13375 break;
13376 case E_CCCmode:
13377 suffix = "nc";
13378 break;
13379 case E_CCOmode:
13380 suffix = "no";
13381 break;
13382 case E_CCPmode:
13383 suffix = "np";
13384 break;
13385 case E_CCSmode:
13386 suffix = "ns";
13387 break;
13388 default:
13389 suffix = "ne";
13390 break;
13392 break;
13393 case GT:
13394 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13395 suffix = "g";
13396 break;
13397 case GTU:
13398 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13399 Those same assemblers have the same but opposite lossage on cmov. */
13400 if (mode == CCmode)
13401 suffix = fp ? "nbe" : "a";
13402 else
13403 gcc_unreachable ();
13404 break;
13405 case LT:
13406 switch (mode)
13408 case E_CCNOmode:
13409 case E_CCGOCmode:
13410 suffix = "s";
13411 break;
13413 case E_CCmode:
13414 case E_CCGCmode:
13415 case E_CCGZmode:
13416 suffix = "l";
13417 break;
13419 default:
13420 gcc_unreachable ();
13422 break;
13423 case LTU:
13424 if (mode == CCmode || mode == CCGZmode)
13425 suffix = "b";
13426 else if (mode == CCCmode)
13427 suffix = fp ? "b" : "c";
13428 else
13429 gcc_unreachable ();
13430 break;
13431 case GE:
13432 switch (mode)
13434 case E_CCNOmode:
13435 case E_CCGOCmode:
13436 suffix = "ns";
13437 break;
13439 case E_CCmode:
13440 case E_CCGCmode:
13441 case E_CCGZmode:
13442 suffix = "ge";
13443 break;
13445 default:
13446 gcc_unreachable ();
13448 break;
13449 case GEU:
13450 if (mode == CCmode || mode == CCGZmode)
13451 suffix = "nb";
13452 else if (mode == CCCmode)
13453 suffix = fp ? "nb" : "nc";
13454 else
13455 gcc_unreachable ();
13456 break;
13457 case LE:
13458 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13459 suffix = "le";
13460 break;
13461 case LEU:
13462 if (mode == CCmode)
13463 suffix = "be";
13464 else
13465 gcc_unreachable ();
13466 break;
13467 case UNORDERED:
13468 suffix = fp ? "u" : "p";
13469 break;
13470 case ORDERED:
13471 suffix = fp ? "nu" : "np";
13472 break;
13473 default:
13474 gcc_unreachable ();
13476 fputs (suffix, file);
13479 /* Print the name of register X to FILE based on its machine mode and number.
13480 If CODE is 'w', pretend the mode is HImode.
13481 If CODE is 'b', pretend the mode is QImode.
13482 If CODE is 'k', pretend the mode is SImode.
13483 If CODE is 'q', pretend the mode is DImode.
13484 If CODE is 'x', pretend the mode is V4SFmode.
13485 If CODE is 't', pretend the mode is V8SFmode.
13486 If CODE is 'g', pretend the mode is V16SFmode.
13487 If CODE is 'h', pretend the reg is the 'high' byte register.
13488 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13489 If CODE is 'd', duplicate the operand for AVX instruction.
13490 If CODE is 'V', print naked full integer register name without %.
13493 void
13494 print_reg (rtx x, int code, FILE *file)
13496 const char *reg;
13497 int msize;
13498 unsigned int regno;
13499 bool duplicated;
13501 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
13502 putc ('%', file);
13504 if (x == pc_rtx)
13506 gcc_assert (TARGET_64BIT);
13507 fputs ("rip", file);
13508 return;
13511 if (code == 'y' && STACK_TOP_P (x))
13513 fputs ("st(0)", file);
13514 return;
13517 if (code == 'w')
13518 msize = 2;
13519 else if (code == 'b')
13520 msize = 1;
13521 else if (code == 'k')
13522 msize = 4;
13523 else if (code == 'q')
13524 msize = 8;
13525 else if (code == 'h')
13526 msize = 0;
13527 else if (code == 'x')
13528 msize = 16;
13529 else if (code == 't')
13530 msize = 32;
13531 else if (code == 'g')
13532 msize = 64;
13533 else
13534 msize = GET_MODE_SIZE (GET_MODE (x));
13536 regno = REGNO (x);
13538 if (regno == ARG_POINTER_REGNUM
13539 || regno == FRAME_POINTER_REGNUM
13540 || regno == FPSR_REG)
13542 output_operand_lossage
13543 ("invalid use of register '%s'", reg_names[regno]);
13544 return;
13546 else if (regno == FLAGS_REG)
13548 output_operand_lossage ("invalid use of asm flag output");
13549 return;
13552 if (code == 'V')
13554 if (GENERAL_REGNO_P (regno))
13555 msize = GET_MODE_SIZE (word_mode);
13556 else
13557 error ("%<V%> modifier on non-integer register");
13560 duplicated = code == 'd' && TARGET_AVX;
13562 switch (msize)
13564 case 16:
13565 case 12:
13566 case 8:
13567 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
13568 warning (0, "unsupported size for integer register");
13569 /* FALLTHRU */
13570 case 4:
13571 if (LEGACY_INT_REGNO_P (regno))
13572 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
13573 /* FALLTHRU */
13574 case 2:
13575 normal:
13576 reg = hi_reg_name[regno];
13577 break;
13578 case 1:
13579 if (regno >= ARRAY_SIZE (qi_reg_name))
13580 goto normal;
13581 if (!ANY_QI_REGNO_P (regno))
13582 error ("unsupported size for integer register");
13583 reg = qi_reg_name[regno];
13584 break;
13585 case 0:
13586 if (regno >= ARRAY_SIZE (qi_high_reg_name))
13587 goto normal;
13588 reg = qi_high_reg_name[regno];
13589 break;
13590 case 32:
13591 case 64:
13592 if (SSE_REGNO_P (regno))
13594 gcc_assert (!duplicated);
13595 putc (msize == 32 ? 'y' : 'z', file);
13596 reg = hi_reg_name[regno] + 1;
13597 break;
13599 goto normal;
13600 default:
13601 gcc_unreachable ();
13604 fputs (reg, file);
13606 /* Irritatingly, AMD extended registers use
13607 different naming convention: "r%d[bwd]" */
13608 if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
13610 gcc_assert (TARGET_64BIT);
13611 switch (msize)
13613 case 0:
13614 error ("extended registers have no high halves");
13615 break;
13616 case 1:
13617 putc ('b', file);
13618 break;
13619 case 2:
13620 putc ('w', file);
13621 break;
13622 case 4:
13623 putc ('d', file);
13624 break;
13625 case 8:
13626 /* no suffix */
13627 break;
13628 default:
13629 error ("unsupported operand size for extended register");
13630 break;
13632 return;
13635 if (duplicated)
13637 if (ASSEMBLER_DIALECT == ASM_ATT)
13638 fprintf (file, ", %%%s", reg);
13639 else
13640 fprintf (file, ", %s", reg);
13644 /* Meaning of CODE:
13645 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13646 C -- print opcode suffix for set/cmov insn.
13647 c -- like C, but print reversed condition
13648 F,f -- likewise, but for floating-point.
13649 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13650 otherwise nothing
13651 R -- print embedded rounding and sae.
13652 r -- print only sae.
13653 z -- print the opcode suffix for the size of the current operand.
13654 Z -- likewise, with special suffixes for x87 instructions.
13655 * -- print a star (in certain assembler syntax)
13656 A -- print an absolute memory reference.
13657 E -- print address with DImode register names if TARGET_64BIT.
13658 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13659 s -- print a shift double count, followed by the assemblers argument
13660 delimiter.
13661 b -- print the QImode name of the register for the indicated operand.
13662 %b0 would print %al if operands[0] is reg 0.
13663 w -- likewise, print the HImode name of the register.
13664 k -- likewise, print the SImode name of the register.
13665 q -- likewise, print the DImode name of the register.
13666 x -- likewise, print the V4SFmode name of the register.
13667 t -- likewise, print the V8SFmode name of the register.
13668 g -- likewise, print the V16SFmode name of the register.
13669 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13670 y -- print "st(0)" instead of "st" as a register.
13671 d -- print duplicated register operand for AVX instruction.
13672 D -- print condition for SSE cmp instruction.
13673 P -- if PIC, print an @PLT suffix. For -fno-plt, load function
13674 address from GOT.
13675 p -- print raw symbol name.
13676 X -- don't print any sort of PIC '@' suffix for a symbol.
13677 & -- print some in-use local-dynamic symbol name.
13678 H -- print a memory address offset by 8; used for sse high-parts
13679 Y -- print condition for XOP pcom* instruction.
13680 V -- print naked full integer register name without %.
13681 + -- print a branch hint as 'cs' or 'ds' prefix
13682 ; -- print a semicolon (after prefixes due to bug in older gas).
13683 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13684 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13685 M -- print addr32 prefix for TARGET_X32 with VSIB address.
13686 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
13687 N -- print maskz if it's constant 0 operand.
13690 void
13691 ix86_print_operand (FILE *file, rtx x, int code)
13693 if (code)
13695 switch (code)
13697 case 'A':
13698 switch (ASSEMBLER_DIALECT)
13700 case ASM_ATT:
13701 putc ('*', file);
13702 break;
13704 case ASM_INTEL:
13705 /* Intel syntax. For absolute addresses, registers should not
13706 be surrounded by braces. */
13707 if (!REG_P (x))
13709 putc ('[', file);
13710 ix86_print_operand (file, x, 0);
13711 putc (']', file);
13712 return;
13714 break;
13716 default:
13717 gcc_unreachable ();
13720 ix86_print_operand (file, x, 0);
13721 return;
13723 case 'E':
13724 /* Wrap address in an UNSPEC to declare special handling. */
13725 if (TARGET_64BIT)
13726 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
13728 output_address (VOIDmode, x);
13729 return;
13731 case 'L':
13732 if (ASSEMBLER_DIALECT == ASM_ATT)
13733 putc ('l', file);
13734 return;
13736 case 'W':
13737 if (ASSEMBLER_DIALECT == ASM_ATT)
13738 putc ('w', file);
13739 return;
13741 case 'B':
13742 if (ASSEMBLER_DIALECT == ASM_ATT)
13743 putc ('b', file);
13744 return;
13746 case 'Q':
13747 if (ASSEMBLER_DIALECT == ASM_ATT)
13748 putc ('l', file);
13749 return;
13751 case 'S':
13752 if (ASSEMBLER_DIALECT == ASM_ATT)
13753 putc ('s', file);
13754 return;
13756 case 'T':
13757 if (ASSEMBLER_DIALECT == ASM_ATT)
13758 putc ('t', file);
13759 return;
13761 case 'O':
13762 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13763 if (ASSEMBLER_DIALECT != ASM_ATT)
13764 return;
13766 switch (GET_MODE_SIZE (GET_MODE (x)))
13768 case 2:
13769 putc ('w', file);
13770 break;
13772 case 4:
13773 putc ('l', file);
13774 break;
13776 case 8:
13777 putc ('q', file);
13778 break;
13780 default:
13781 output_operand_lossage ("invalid operand size for operand "
13782 "code 'O'");
13783 return;
13786 putc ('.', file);
13787 #endif
13788 return;
13790 case 'z':
13791 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13793 /* Opcodes don't get size suffixes if using Intel opcodes. */
13794 if (ASSEMBLER_DIALECT == ASM_INTEL)
13795 return;
13797 switch (GET_MODE_SIZE (GET_MODE (x)))
13799 case 1:
13800 putc ('b', file);
13801 return;
13803 case 2:
13804 putc ('w', file);
13805 return;
13807 case 4:
13808 putc ('l', file);
13809 return;
13811 case 8:
13812 putc ('q', file);
13813 return;
13815 default:
13816 output_operand_lossage ("invalid operand size for operand "
13817 "code 'z'");
13818 return;
13822 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13824 if (this_is_asm_operands)
13825 warning_for_asm (this_is_asm_operands,
13826 "non-integer operand used with operand code %<z%>");
13827 else
13828 warning (0, "non-integer operand used with operand code %<z%>");
13830 /* FALLTHRU */
13832 case 'Z':
13833 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13834 if (ASSEMBLER_DIALECT == ASM_INTEL)
13835 return;
13837 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13839 switch (GET_MODE_SIZE (GET_MODE (x)))
13841 case 2:
13842 #ifdef HAVE_AS_IX86_FILDS
13843 putc ('s', file);
13844 #endif
13845 return;
13847 case 4:
13848 putc ('l', file);
13849 return;
13851 case 8:
13852 #ifdef HAVE_AS_IX86_FILDQ
13853 putc ('q', file);
13854 #else
13855 fputs ("ll", file);
13856 #endif
13857 return;
13859 default:
13860 break;
13863 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13865 /* 387 opcodes don't get size suffixes
13866 if the operands are registers. */
13867 if (STACK_REG_P (x))
13868 return;
13870 switch (GET_MODE_SIZE (GET_MODE (x)))
13872 case 4:
13873 putc ('s', file);
13874 return;
13876 case 8:
13877 putc ('l', file);
13878 return;
13880 case 12:
13881 case 16:
13882 putc ('t', file);
13883 return;
13885 default:
13886 break;
13889 else
13891 output_operand_lossage ("invalid operand type used with "
13892 "operand code '%c'", code);
13893 return;
13896 output_operand_lossage ("invalid operand size for operand code '%c'",
13897 code);
13898 return;
13900 case 'd':
13901 case 'b':
13902 case 'w':
13903 case 'k':
13904 case 'q':
13905 case 'h':
13906 case 't':
13907 case 'g':
13908 case 'y':
13909 case 'x':
13910 case 'X':
13911 case 'P':
13912 case 'p':
13913 case 'V':
13914 break;
13916 case 's':
13917 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13919 ix86_print_operand (file, x, 0);
13920 fputs (", ", file);
13922 return;
13924 case 'Y':
13925 switch (GET_CODE (x))
13927 case NE:
13928 fputs ("neq", file);
13929 break;
13930 case EQ:
13931 fputs ("eq", file);
13932 break;
13933 case GE:
13934 case GEU:
13935 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13936 break;
13937 case GT:
13938 case GTU:
13939 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13940 break;
13941 case LE:
13942 case LEU:
13943 fputs ("le", file);
13944 break;
13945 case LT:
13946 case LTU:
13947 fputs ("lt", file);
13948 break;
13949 case UNORDERED:
13950 fputs ("unord", file);
13951 break;
13952 case ORDERED:
13953 fputs ("ord", file);
13954 break;
13955 case UNEQ:
13956 fputs ("ueq", file);
13957 break;
13958 case UNGE:
13959 fputs ("nlt", file);
13960 break;
13961 case UNGT:
13962 fputs ("nle", file);
13963 break;
13964 case UNLE:
13965 fputs ("ule", file);
13966 break;
13967 case UNLT:
13968 fputs ("ult", file);
13969 break;
13970 case LTGT:
13971 fputs ("une", file);
13972 break;
13973 default:
13974 output_operand_lossage ("operand is not a condition code, "
13975 "invalid operand code 'Y'");
13976 return;
13978 return;
13980 case 'D':
13981 /* Little bit of braindamage here. The SSE compare instructions
13982 does use completely different names for the comparisons that the
13983 fp conditional moves. */
13984 switch (GET_CODE (x))
13986 case UNEQ:
13987 if (TARGET_AVX)
13989 fputs ("eq_us", file);
13990 break;
13992 /* FALLTHRU */
13993 case EQ:
13994 fputs ("eq", file);
13995 break;
13996 case UNLT:
13997 if (TARGET_AVX)
13999 fputs ("nge", file);
14000 break;
14002 /* FALLTHRU */
14003 case LT:
14004 fputs ("lt", file);
14005 break;
14006 case UNLE:
14007 if (TARGET_AVX)
14009 fputs ("ngt", file);
14010 break;
14012 /* FALLTHRU */
14013 case LE:
14014 fputs ("le", file);
14015 break;
14016 case UNORDERED:
14017 fputs ("unord", file);
14018 break;
14019 case LTGT:
14020 if (TARGET_AVX)
14022 fputs ("neq_oq", file);
14023 break;
14025 /* FALLTHRU */
14026 case NE:
14027 fputs ("neq", file);
14028 break;
14029 case GE:
14030 if (TARGET_AVX)
14032 fputs ("ge", file);
14033 break;
14035 /* FALLTHRU */
14036 case UNGE:
14037 fputs ("nlt", file);
14038 break;
14039 case GT:
14040 if (TARGET_AVX)
14042 fputs ("gt", file);
14043 break;
14045 /* FALLTHRU */
14046 case UNGT:
14047 fputs ("nle", file);
14048 break;
14049 case ORDERED:
14050 fputs ("ord", file);
14051 break;
14052 default:
14053 output_operand_lossage ("operand is not a condition code, "
14054 "invalid operand code 'D'");
14055 return;
14057 return;
14059 case 'F':
14060 case 'f':
14061 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14062 if (ASSEMBLER_DIALECT == ASM_ATT)
14063 putc ('.', file);
14064 gcc_fallthrough ();
14065 #endif
14067 case 'C':
14068 case 'c':
14069 if (!COMPARISON_P (x))
14071 output_operand_lossage ("operand is not a condition code, "
14072 "invalid operand code '%c'", code);
14073 return;
14075 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
14076 code == 'c' || code == 'f',
14077 code == 'F' || code == 'f',
14078 file);
14079 return;
14081 case 'H':
14082 if (!offsettable_memref_p (x))
14084 output_operand_lossage ("operand is not an offsettable memory "
14085 "reference, invalid operand code 'H'");
14086 return;
14088 /* It doesn't actually matter what mode we use here, as we're
14089 only going to use this for printing. */
14090 x = adjust_address_nv (x, DImode, 8);
14091 /* Output 'qword ptr' for intel assembler dialect. */
14092 if (ASSEMBLER_DIALECT == ASM_INTEL)
14093 code = 'q';
14094 break;
14096 case 'K':
14097 if (!CONST_INT_P (x))
14099 output_operand_lossage ("operand is not an integer, invalid "
14100 "operand code 'K'");
14101 return;
14104 if (INTVAL (x) & IX86_HLE_ACQUIRE)
14105 #ifdef HAVE_AS_IX86_HLE
14106 fputs ("xacquire ", file);
14107 #else
14108 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
14109 #endif
14110 else if (INTVAL (x) & IX86_HLE_RELEASE)
14111 #ifdef HAVE_AS_IX86_HLE
14112 fputs ("xrelease ", file);
14113 #else
14114 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
14115 #endif
14116 /* We do not want to print value of the operand. */
14117 return;
14119 case 'N':
14120 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
14121 fputs ("{z}", file);
14122 return;
14124 case 'r':
14125 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
14127 output_operand_lossage ("operand is not a specific integer, "
14128 "invalid operand code 'r'");
14129 return;
14132 if (ASSEMBLER_DIALECT == ASM_INTEL)
14133 fputs (", ", file);
14135 fputs ("{sae}", file);
14137 if (ASSEMBLER_DIALECT == ASM_ATT)
14138 fputs (", ", file);
14140 return;
14142 case 'R':
14143 if (!CONST_INT_P (x))
14145 output_operand_lossage ("operand is not an integer, invalid "
14146 "operand code 'R'");
14147 return;
14150 if (ASSEMBLER_DIALECT == ASM_INTEL)
14151 fputs (", ", file);
14153 switch (INTVAL (x))
14155 case ROUND_NEAREST_INT | ROUND_SAE:
14156 fputs ("{rn-sae}", file);
14157 break;
14158 case ROUND_NEG_INF | ROUND_SAE:
14159 fputs ("{rd-sae}", file);
14160 break;
14161 case ROUND_POS_INF | ROUND_SAE:
14162 fputs ("{ru-sae}", file);
14163 break;
14164 case ROUND_ZERO | ROUND_SAE:
14165 fputs ("{rz-sae}", file);
14166 break;
14167 default:
14168 output_operand_lossage ("operand is not a specific integer, "
14169 "invalid operand code 'R'");
14172 if (ASSEMBLER_DIALECT == ASM_ATT)
14173 fputs (", ", file);
14175 return;
14177 case '*':
14178 if (ASSEMBLER_DIALECT == ASM_ATT)
14179 putc ('*', file);
14180 return;
14182 case '&':
14184 const char *name = get_some_local_dynamic_name ();
14185 if (name == NULL)
14186 output_operand_lossage ("'%%&' used without any "
14187 "local dynamic TLS references");
14188 else
14189 assemble_name (file, name);
14190 return;
14193 case '+':
14195 rtx x;
14197 if (!optimize
14198 || optimize_function_for_size_p (cfun)
14199 || !TARGET_BRANCH_PREDICTION_HINTS)
14200 return;
14202 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14203 if (x)
14205 int pred_val = profile_probability::from_reg_br_prob_note
14206 (XINT (x, 0)).to_reg_br_prob_base ();
14208 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14209 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14211 bool taken = pred_val > REG_BR_PROB_BASE / 2;
14212 bool cputaken
14213 = final_forward_branch_p (current_output_insn) == 0;
14215 /* Emit hints only in the case default branch prediction
14216 heuristics would fail. */
14217 if (taken != cputaken)
14219 /* We use 3e (DS) prefix for taken branches and
14220 2e (CS) prefix for not taken branches. */
14221 if (taken)
14222 fputs ("ds ; ", file);
14223 else
14224 fputs ("cs ; ", file);
14228 return;
14231 case ';':
14232 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14233 putc (';', file);
14234 #endif
14235 return;
14237 case '~':
14238 putc (TARGET_AVX2 ? 'i' : 'f', file);
14239 return;
14241 case 'M':
14242 if (TARGET_X32)
14244 /* NB: 32-bit indices in VSIB address are sign-extended
14245 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
14246 sign-extended to 0xfffffffff7fa3010 which is invalid
14247 address. Add addr32 prefix if there is no base
14248 register nor symbol. */
14249 bool ok;
14250 struct ix86_address parts;
14251 ok = ix86_decompose_address (x, &parts);
14252 gcc_assert (ok && parts.index == NULL_RTX);
14253 if (parts.base == NULL_RTX
14254 && (parts.disp == NULL_RTX
14255 || !symbolic_operand (parts.disp,
14256 GET_MODE (parts.disp))))
14257 fputs ("addr32 ", file);
14259 return;
14261 case '^':
14262 if (TARGET_64BIT && Pmode != word_mode)
14263 fputs ("addr32 ", file);
14264 return;
14266 case '!':
14267 if (ix86_notrack_prefixed_insn_p (current_output_insn))
14268 fputs ("notrack ", file);
14269 return;
14271 default:
14272 output_operand_lossage ("invalid operand code '%c'", code);
14276 if (REG_P (x))
14277 print_reg (x, code, file);
14279 else if (MEM_P (x))
14281 rtx addr = XEXP (x, 0);
14283 /* No `byte ptr' prefix for call instructions ... */
14284 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
14286 machine_mode mode = GET_MODE (x);
14287 const char *size;
14289 /* Check for explicit size override codes. */
14290 if (code == 'b')
14291 size = "BYTE";
14292 else if (code == 'w')
14293 size = "WORD";
14294 else if (code == 'k')
14295 size = "DWORD";
14296 else if (code == 'q')
14297 size = "QWORD";
14298 else if (code == 'x')
14299 size = "XMMWORD";
14300 else if (code == 't')
14301 size = "YMMWORD";
14302 else if (code == 'g')
14303 size = "ZMMWORD";
14304 else if (mode == BLKmode)
14305 /* ... or BLKmode operands, when not overridden. */
14306 size = NULL;
14307 else
14308 switch (GET_MODE_SIZE (mode))
14310 case 1: size = "BYTE"; break;
14311 case 2: size = "WORD"; break;
14312 case 4: size = "DWORD"; break;
14313 case 8: size = "QWORD"; break;
14314 case 12: size = "TBYTE"; break;
14315 case 16:
14316 if (mode == XFmode)
14317 size = "TBYTE";
14318 else
14319 size = "XMMWORD";
14320 break;
14321 case 32: size = "YMMWORD"; break;
14322 case 64: size = "ZMMWORD"; break;
14323 default:
14324 gcc_unreachable ();
14326 if (size)
14328 fputs (size, file);
14329 fputs (" PTR ", file);
14333 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14334 output_operand_lossage ("invalid constraints for operand");
14335 else
14336 ix86_print_operand_address_as
14337 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
14340 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
14342 long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
14343 REAL_MODE_FORMAT (HFmode));
14344 if (ASSEMBLER_DIALECT == ASM_ATT)
14345 putc ('$', file);
14346 fprintf (file, "0x%04x", (unsigned int) l);
14349 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
14351 long l;
14353 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14355 if (ASSEMBLER_DIALECT == ASM_ATT)
14356 putc ('$', file);
14357 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14358 if (code == 'q')
14359 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
14360 (unsigned long long) (int) l);
14361 else
14362 fprintf (file, "0x%08x", (unsigned int) l);
14365 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
14367 long l[2];
14369 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
14371 if (ASSEMBLER_DIALECT == ASM_ATT)
14372 putc ('$', file);
14373 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14376 /* These float cases don't actually occur as immediate operands. */
14377 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
14379 char dstr[30];
14381 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14382 fputs (dstr, file);
14385 /* Print bcst_mem_operand. */
14386 else if (GET_CODE (x) == VEC_DUPLICATE)
14388 machine_mode vmode = GET_MODE (x);
14389 /* Must be bcst_memory_operand. */
14390 gcc_assert (bcst_mem_operand (x, vmode));
14392 rtx mem = XEXP (x,0);
14393 ix86_print_operand (file, mem, 0);
14395 switch (vmode)
14397 case E_V2DImode:
14398 case E_V2DFmode:
14399 fputs ("{1to2}", file);
14400 break;
14401 case E_V4SImode:
14402 case E_V4SFmode:
14403 case E_V4DImode:
14404 case E_V4DFmode:
14405 fputs ("{1to4}", file);
14406 break;
14407 case E_V8SImode:
14408 case E_V8SFmode:
14409 case E_V8DFmode:
14410 case E_V8DImode:
14411 case E_V8HFmode:
14412 fputs ("{1to8}", file);
14413 break;
14414 case E_V16SFmode:
14415 case E_V16SImode:
14416 case E_V16HFmode:
14417 fputs ("{1to16}", file);
14418 break;
14419 case E_V32HFmode:
14420 fputs ("{1to32}", file);
14421 break;
14422 default:
14423 gcc_unreachable ();
14427 else
14429 /* We have patterns that allow zero sets of memory, for instance.
14430 In 64-bit mode, we should probably support all 8-byte vectors,
14431 since we can in fact encode that into an immediate. */
14432 if (GET_CODE (x) == CONST_VECTOR)
14434 if (x != CONST0_RTX (GET_MODE (x)))
14435 output_operand_lossage ("invalid vector immediate");
14436 x = const0_rtx;
14439 if (code == 'P')
14441 if (ix86_force_load_from_GOT_p (x, true))
14443 /* For inline assembly statement, load function address
14444 from GOT with 'P' operand modifier to avoid PLT. */
14445 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14446 (TARGET_64BIT
14447 ? UNSPEC_GOTPCREL
14448 : UNSPEC_GOT));
14449 x = gen_rtx_CONST (Pmode, x);
14450 x = gen_const_mem (Pmode, x);
14451 ix86_print_operand (file, x, 'A');
14452 return;
14455 else if (code != 'p')
14457 if (CONST_INT_P (x))
14459 if (ASSEMBLER_DIALECT == ASM_ATT)
14460 putc ('$', file);
14462 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14463 || GET_CODE (x) == LABEL_REF)
14465 if (ASSEMBLER_DIALECT == ASM_ATT)
14466 putc ('$', file);
14467 else
14468 fputs ("OFFSET FLAT:", file);
14471 if (CONST_INT_P (x))
14472 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14473 else if (flag_pic || MACHOPIC_INDIRECT)
14474 output_pic_addr_const (file, x, code);
14475 else
14476 output_addr_const (file, x);
14480 static bool
14481 ix86_print_operand_punct_valid_p (unsigned char code)
14483 return (code == '*' || code == '+' || code == '&' || code == ';'
14484 || code == '~' || code == '^' || code == '!');
14487 /* Print a memory operand whose address is ADDR. */
14489 static void
14490 ix86_print_operand_address_as (FILE *file, rtx addr,
14491 addr_space_t as, bool raw)
14493 struct ix86_address parts;
14494 rtx base, index, disp;
14495 int scale;
14496 int ok;
14497 bool vsib = false;
14498 int code = 0;
14500 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14502 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14503 gcc_assert (parts.index == NULL_RTX);
14504 parts.index = XVECEXP (addr, 0, 1);
14505 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14506 addr = XVECEXP (addr, 0, 0);
14507 vsib = true;
14509 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14511 gcc_assert (TARGET_64BIT);
14512 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14513 code = 'q';
14515 else
14516 ok = ix86_decompose_address (addr, &parts);
14518 gcc_assert (ok);
14520 base = parts.base;
14521 index = parts.index;
14522 disp = parts.disp;
14523 scale = parts.scale;
14525 if (ADDR_SPACE_GENERIC_P (as))
14526 as = parts.seg;
14527 else
14528 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
14530 if (!ADDR_SPACE_GENERIC_P (as) && !raw)
14532 if (ASSEMBLER_DIALECT == ASM_ATT)
14533 putc ('%', file);
14535 switch (as)
14537 case ADDR_SPACE_SEG_FS:
14538 fputs ("fs:", file);
14539 break;
14540 case ADDR_SPACE_SEG_GS:
14541 fputs ("gs:", file);
14542 break;
14543 default:
14544 gcc_unreachable ();
14548 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14549 if (TARGET_64BIT && !base && !index && !raw)
14551 rtx symbol = disp;
14553 if (GET_CODE (disp) == CONST
14554 && GET_CODE (XEXP (disp, 0)) == PLUS
14555 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14556 symbol = XEXP (XEXP (disp, 0), 0);
14558 if (GET_CODE (symbol) == LABEL_REF
14559 || (GET_CODE (symbol) == SYMBOL_REF
14560 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14561 base = pc_rtx;
14564 if (!base && !index)
14566 /* Displacement only requires special attention. */
14567 if (CONST_INT_P (disp))
14569 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
14570 fputs ("ds:", file);
14571 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14573 /* Load the external function address via the GOT slot to avoid PLT. */
14574 else if (GET_CODE (disp) == CONST
14575 && GET_CODE (XEXP (disp, 0)) == UNSPEC
14576 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
14577 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
14578 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
14579 output_pic_addr_const (file, disp, 0);
14580 else if (flag_pic)
14581 output_pic_addr_const (file, disp, 0);
14582 else
14583 output_addr_const (file, disp);
14585 else
14587 /* Print SImode register names to force addr32 prefix. */
14588 if (SImode_address_operand (addr, VOIDmode))
14590 if (flag_checking)
14592 gcc_assert (TARGET_64BIT);
14593 switch (GET_CODE (addr))
14595 case SUBREG:
14596 gcc_assert (GET_MODE (addr) == SImode);
14597 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
14598 break;
14599 case ZERO_EXTEND:
14600 case AND:
14601 gcc_assert (GET_MODE (addr) == DImode);
14602 break;
14603 default:
14604 gcc_unreachable ();
14607 gcc_assert (!code);
14608 code = 'k';
14610 else if (code == 0
14611 && TARGET_X32
14612 && disp
14613 && CONST_INT_P (disp)
14614 && INTVAL (disp) < -16*1024*1024)
14616 /* X32 runs in 64-bit mode, where displacement, DISP, in
14617 address DISP(%r64), is encoded as 32-bit immediate sign-
14618 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14619 address is %r64 + 0xffffffffbffffd00. When %r64 <
14620 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14621 which is invalid for x32. The correct address is %r64
14622 - 0x40000300 == 0xf7ffdd64. To properly encode
14623 -0x40000300(%r64) for x32, we zero-extend negative
14624 displacement by forcing addr32 prefix which truncates
14625 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14626 zero-extend all negative displacements, including -1(%rsp).
14627 However, for small negative displacements, sign-extension
14628 won't cause overflow. We only zero-extend negative
14629 displacements if they < -16*1024*1024, which is also used
14630 to check legitimate address displacements for PIC. */
14631 code = 'k';
14634 /* Since the upper 32 bits of RSP are always zero for x32,
14635 we can encode %esp as %rsp to avoid 0x67 prefix if
14636 there is no index register. */
14637 if (TARGET_X32 && Pmode == SImode
14638 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
14639 code = 'q';
14641 if (ASSEMBLER_DIALECT == ASM_ATT)
14643 if (disp)
14645 if (flag_pic)
14646 output_pic_addr_const (file, disp, 0);
14647 else if (GET_CODE (disp) == LABEL_REF)
14648 output_asm_label (disp);
14649 else
14650 output_addr_const (file, disp);
14653 putc ('(', file);
14654 if (base)
14655 print_reg (base, code, file);
14656 if (index)
14658 putc (',', file);
14659 print_reg (index, vsib ? 0 : code, file);
14660 if (scale != 1 || vsib)
14661 fprintf (file, ",%d", scale);
14663 putc (')', file);
14665 else
14667 rtx offset = NULL_RTX;
14669 if (disp)
14671 /* Pull out the offset of a symbol; print any symbol itself. */
14672 if (GET_CODE (disp) == CONST
14673 && GET_CODE (XEXP (disp, 0)) == PLUS
14674 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14676 offset = XEXP (XEXP (disp, 0), 1);
14677 disp = gen_rtx_CONST (VOIDmode,
14678 XEXP (XEXP (disp, 0), 0));
14681 if (flag_pic)
14682 output_pic_addr_const (file, disp, 0);
14683 else if (GET_CODE (disp) == LABEL_REF)
14684 output_asm_label (disp);
14685 else if (CONST_INT_P (disp))
14686 offset = disp;
14687 else
14688 output_addr_const (file, disp);
14691 putc ('[', file);
14692 if (base)
14694 print_reg (base, code, file);
14695 if (offset)
14697 if (INTVAL (offset) >= 0)
14698 putc ('+', file);
14699 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14702 else if (offset)
14703 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14704 else
14705 putc ('0', file);
14707 if (index)
14709 putc ('+', file);
14710 print_reg (index, vsib ? 0 : code, file);
14711 if (scale != 1 || vsib)
14712 fprintf (file, "*%d", scale);
14714 putc (']', file);
14719 static void
14720 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
14722 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14723 output_operand_lossage ("invalid constraints for operand");
14724 else
14725 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
14728 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14730 static bool
14731 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14733 rtx op;
14735 if (GET_CODE (x) != UNSPEC)
14736 return false;
14738 op = XVECEXP (x, 0, 0);
14739 switch (XINT (x, 1))
14741 case UNSPEC_GOTOFF:
14742 output_addr_const (file, op);
14743 fputs ("@gotoff", file);
14744 break;
14745 case UNSPEC_GOTTPOFF:
14746 output_addr_const (file, op);
14747 /* FIXME: This might be @TPOFF in Sun ld. */
14748 fputs ("@gottpoff", file);
14749 break;
14750 case UNSPEC_TPOFF:
14751 output_addr_const (file, op);
14752 fputs ("@tpoff", file);
14753 break;
14754 case UNSPEC_NTPOFF:
14755 output_addr_const (file, op);
14756 if (TARGET_64BIT)
14757 fputs ("@tpoff", file);
14758 else
14759 fputs ("@ntpoff", file);
14760 break;
14761 case UNSPEC_DTPOFF:
14762 output_addr_const (file, op);
14763 fputs ("@dtpoff", file);
14764 break;
14765 case UNSPEC_GOTNTPOFF:
14766 output_addr_const (file, op);
14767 if (TARGET_64BIT)
14768 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14769 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14770 else
14771 fputs ("@gotntpoff", file);
14772 break;
14773 case UNSPEC_INDNTPOFF:
14774 output_addr_const (file, op);
14775 fputs ("@indntpoff", file);
14776 break;
14777 #if TARGET_MACHO
14778 case UNSPEC_MACHOPIC_OFFSET:
14779 output_addr_const (file, op);
14780 putc ('-', file);
14781 machopic_output_function_base_name (file);
14782 break;
14783 #endif
14785 default:
14786 return false;
14789 return true;
14793 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14794 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14795 is the expression of the binary operation. The output may either be
14796 emitted here, or returned to the caller, like all output_* functions.
14798 There is no guarantee that the operands are the same mode, as they
14799 might be within FLOAT or FLOAT_EXTEND expressions. */
14801 #ifndef SYSV386_COMPAT
14802 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14803 wants to fix the assemblers because that causes incompatibility
14804 with gcc. No-one wants to fix gcc because that causes
14805 incompatibility with assemblers... You can use the option of
14806 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14807 #define SYSV386_COMPAT 1
14808 #endif
14810 const char *
14811 output_387_binary_op (rtx_insn *insn, rtx *operands)
14813 static char buf[40];
14814 const char *p;
14815 bool is_sse
14816 = (SSE_REG_P (operands[0])
14817 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
14819 if (is_sse)
14820 p = "%v";
14821 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14822 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14823 p = "fi";
14824 else
14825 p = "f";
14827 strcpy (buf, p);
14829 switch (GET_CODE (operands[3]))
14831 case PLUS:
14832 p = "add"; break;
14833 case MINUS:
14834 p = "sub"; break;
14835 case MULT:
14836 p = "mul"; break;
14837 case DIV:
14838 p = "div"; break;
14839 default:
14840 gcc_unreachable ();
14843 strcat (buf, p);
14845 if (is_sse)
14847 p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
14848 strcat (buf, p);
14850 if (TARGET_AVX)
14851 p = "\t{%2, %1, %0|%0, %1, %2}";
14852 else
14853 p = "\t{%2, %0|%0, %2}";
14855 strcat (buf, p);
14856 return buf;
14859 /* Even if we do not want to check the inputs, this documents input
14860 constraints. Which helps in understanding the following code. */
14861 if (flag_checking)
14863 if (STACK_REG_P (operands[0])
14864 && ((REG_P (operands[1])
14865 && REGNO (operands[0]) == REGNO (operands[1])
14866 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14867 || (REG_P (operands[2])
14868 && REGNO (operands[0]) == REGNO (operands[2])
14869 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14870 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14871 ; /* ok */
14872 else
14873 gcc_unreachable ();
14876 switch (GET_CODE (operands[3]))
14878 case MULT:
14879 case PLUS:
14880 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14881 std::swap (operands[1], operands[2]);
14883 /* know operands[0] == operands[1]. */
14885 if (MEM_P (operands[2]))
14887 p = "%Z2\t%2";
14888 break;
14891 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14893 if (STACK_TOP_P (operands[0]))
14894 /* How is it that we are storing to a dead operand[2]?
14895 Well, presumably operands[1] is dead too. We can't
14896 store the result to st(0) as st(0) gets popped on this
14897 instruction. Instead store to operands[2] (which I
14898 think has to be st(1)). st(1) will be popped later.
14899 gcc <= 2.8.1 didn't have this check and generated
14900 assembly code that the Unixware assembler rejected. */
14901 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14902 else
14903 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14904 break;
14907 if (STACK_TOP_P (operands[0]))
14908 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14909 else
14910 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14911 break;
14913 case MINUS:
14914 case DIV:
14915 if (MEM_P (operands[1]))
14917 p = "r%Z1\t%1";
14918 break;
14921 if (MEM_P (operands[2]))
14923 p = "%Z2\t%2";
14924 break;
14927 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14929 #if SYSV386_COMPAT
14930 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14931 derived assemblers, confusingly reverse the direction of
14932 the operation for fsub{r} and fdiv{r} when the
14933 destination register is not st(0). The Intel assembler
14934 doesn't have this brain damage. Read !SYSV386_COMPAT to
14935 figure out what the hardware really does. */
14936 if (STACK_TOP_P (operands[0]))
14937 p = "{p\t%0, %2|rp\t%2, %0}";
14938 else
14939 p = "{rp\t%2, %0|p\t%0, %2}";
14940 #else
14941 if (STACK_TOP_P (operands[0]))
14942 /* As above for fmul/fadd, we can't store to st(0). */
14943 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14944 else
14945 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14946 #endif
14947 break;
14950 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14952 #if SYSV386_COMPAT
14953 if (STACK_TOP_P (operands[0]))
14954 p = "{rp\t%0, %1|p\t%1, %0}";
14955 else
14956 p = "{p\t%1, %0|rp\t%0, %1}";
14957 #else
14958 if (STACK_TOP_P (operands[0]))
14959 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14960 else
14961 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14962 #endif
14963 break;
14966 if (STACK_TOP_P (operands[0]))
14968 if (STACK_TOP_P (operands[1]))
14969 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14970 else
14971 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14972 break;
14974 else if (STACK_TOP_P (operands[1]))
14976 #if SYSV386_COMPAT
14977 p = "{\t%1, %0|r\t%0, %1}";
14978 #else
14979 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14980 #endif
14982 else
14984 #if SYSV386_COMPAT
14985 p = "{r\t%2, %0|\t%0, %2}";
14986 #else
14987 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14988 #endif
14990 break;
14992 default:
14993 gcc_unreachable ();
14996 strcat (buf, p);
14997 return buf;
15000 /* Return needed mode for entity in optimize_mode_switching pass. */
15002 static int
15003 ix86_dirflag_mode_needed (rtx_insn *insn)
15005 if (CALL_P (insn))
15007 if (cfun->machine->func_type == TYPE_NORMAL)
15008 return X86_DIRFLAG_ANY;
15009 else
15010 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
15011 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
15014 if (recog_memoized (insn) < 0)
15015 return X86_DIRFLAG_ANY;
15017 if (get_attr_type (insn) == TYPE_STR)
15019 /* Emit cld instruction if stringops are used in the function. */
15020 if (cfun->machine->func_type == TYPE_NORMAL)
15021 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
15022 else
15023 return X86_DIRFLAG_RESET;
15026 return X86_DIRFLAG_ANY;
15029 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
15031 static bool
15032 ix86_check_avx_upper_register (const_rtx exp)
15034 return (SSE_REG_P (exp)
15035 && !EXT_REX_SSE_REG_P (exp)
15036 && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
15039 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
15041 static void
15042 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
15044 if (ix86_check_avx_upper_register (dest))
15046 bool *used = (bool *) data;
15047 *used = true;
15051 /* Return needed mode for entity in optimize_mode_switching pass. */
15053 static int
15054 ix86_avx_u128_mode_needed (rtx_insn *insn)
15056 if (DEBUG_INSN_P (insn))
15057 return AVX_U128_ANY;
15059 if (CALL_P (insn))
15061 rtx link;
15063 /* Needed mode is set to AVX_U128_CLEAN if there are
15064 no 256bit or 512bit modes used in function arguments. */
15065 for (link = CALL_INSN_FUNCTION_USAGE (insn);
15066 link;
15067 link = XEXP (link, 1))
15069 if (GET_CODE (XEXP (link, 0)) == USE)
15071 rtx arg = XEXP (XEXP (link, 0), 0);
15073 if (ix86_check_avx_upper_register (arg))
15074 return AVX_U128_DIRTY;
15078 /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
15079 nor 512bit registers used in the function return register. */
15080 bool avx_upper_reg_found = false;
15081 note_stores (insn, ix86_check_avx_upper_stores,
15082 &avx_upper_reg_found);
15083 if (avx_upper_reg_found)
15084 return AVX_U128_DIRTY;
15086 /* If the function is known to preserve some SSE registers,
15087 RA and previous passes can legitimately rely on that for
15088 modes wider than 256 bits. It's only safe to issue a
15089 vzeroupper if all SSE registers are clobbered. */
15090 const function_abi &abi = insn_callee_abi (insn);
15091 if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
15092 /* Should be safe to issue an vzeroupper before sibling_call_p.
15093 Also there not mode_exit for sibling_call, so there could be
15094 missing vzeroupper for that. */
15095 || !(SIBLING_CALL_P (insn)
15096 || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15097 abi.mode_clobbers (V4DImode))))
15098 return AVX_U128_ANY;
15100 return AVX_U128_CLEAN;
15103 subrtx_iterator::array_type array;
15105 rtx set = single_set (insn);
15106 if (set)
15108 rtx dest = SET_DEST (set);
15109 rtx src = SET_SRC (set);
15110 if (ix86_check_avx_upper_register (dest))
15112 /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
15113 source isn't zero. */
15114 if (standard_sse_constant_p (src, GET_MODE (dest)) != 1)
15115 return AVX_U128_DIRTY;
15116 else
15117 return AVX_U128_ANY;
15119 else
15121 FOR_EACH_SUBRTX (iter, array, src, NONCONST)
15122 if (ix86_check_avx_upper_register (*iter))
15123 return AVX_U128_DIRTY;
15126 /* This isn't YMM/ZMM load/store. */
15127 return AVX_U128_ANY;
15130 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
15131 Hardware changes state only when a 256bit register is written to,
15132 but we need to prevent the compiler from moving optimal insertion
15133 point above eventual read from 256bit or 512 bit register. */
15134 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15135 if (ix86_check_avx_upper_register (*iter))
15136 return AVX_U128_DIRTY;
15138 return AVX_U128_ANY;
15141 /* Return mode that i387 must be switched into
15142 prior to the execution of insn. */
15144 static int
15145 ix86_i387_mode_needed (int entity, rtx_insn *insn)
15147 enum attr_i387_cw mode;
15149 /* The mode UNINITIALIZED is used to store control word after a
15150 function call or ASM pattern. The mode ANY specify that function
15151 has no requirements on the control word and make no changes in the
15152 bits we are interested in. */
15154 if (CALL_P (insn)
15155 || (NONJUMP_INSN_P (insn)
15156 && (asm_noperands (PATTERN (insn)) >= 0
15157 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15158 return I387_CW_UNINITIALIZED;
15160 if (recog_memoized (insn) < 0)
15161 return I387_CW_ANY;
15163 mode = get_attr_i387_cw (insn);
15165 switch (entity)
15167 case I387_ROUNDEVEN:
15168 if (mode == I387_CW_ROUNDEVEN)
15169 return mode;
15170 break;
15172 case I387_TRUNC:
15173 if (mode == I387_CW_TRUNC)
15174 return mode;
15175 break;
15177 case I387_FLOOR:
15178 if (mode == I387_CW_FLOOR)
15179 return mode;
15180 break;
15182 case I387_CEIL:
15183 if (mode == I387_CW_CEIL)
15184 return mode;
15185 break;
15187 default:
15188 gcc_unreachable ();
15191 return I387_CW_ANY;
15194 /* Return mode that entity must be switched into
15195 prior to the execution of insn. */
15197 static int
15198 ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
15200 switch (entity)
15202 case X86_DIRFLAG:
15203 return ix86_dirflag_mode_needed (insn);
15204 case AVX_U128:
15205 return ix86_avx_u128_mode_needed (insn);
15206 case I387_ROUNDEVEN:
15207 case I387_TRUNC:
15208 case I387_FLOOR:
15209 case I387_CEIL:
15210 return ix86_i387_mode_needed (entity, insn);
15211 default:
15212 gcc_unreachable ();
15214 return 0;
15217 /* Calculate mode of upper 128bit AVX registers after the insn. */
15219 static int
15220 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
15222 rtx pat = PATTERN (insn);
15224 if (vzeroupper_pattern (pat, VOIDmode)
15225 || vzeroall_pattern (pat, VOIDmode))
15226 return AVX_U128_CLEAN;
15228 /* We know that state is clean after CALL insn if there are no
15229 256bit or 512bit registers used in the function return register. */
15230 if (CALL_P (insn))
15232 bool avx_upper_reg_found = false;
15233 note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
15235 if (avx_upper_reg_found)
15236 return AVX_U128_DIRTY;
15238 /* If the function desn't clobber any sse registers or only clobber
15239 128-bit part, Then vzeroupper isn't issued before the function exit.
15240 the status not CLEAN but ANY after the function. */
15241 const function_abi &abi = insn_callee_abi (insn);
15242 if (!(SIBLING_CALL_P (insn)
15243 || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
15244 abi.mode_clobbers (V4DImode))))
15245 return AVX_U128_ANY;
15247 return AVX_U128_CLEAN;
15250 /* Otherwise, return current mode. Remember that if insn
15251 references AVX 256bit or 512bit registers, the mode was already
15252 changed to DIRTY from MODE_NEEDED. */
15253 return mode;
15256 /* Return the mode that an insn results in. */
15258 static int
15259 ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
15261 switch (entity)
15263 case X86_DIRFLAG:
15264 return mode;
15265 case AVX_U128:
15266 return ix86_avx_u128_mode_after (mode, insn);
15267 case I387_ROUNDEVEN:
15268 case I387_TRUNC:
15269 case I387_FLOOR:
15270 case I387_CEIL:
15271 return mode;
15272 default:
15273 gcc_unreachable ();
15277 static int
15278 ix86_dirflag_mode_entry (void)
15280 /* For TARGET_CLD or in the interrupt handler we can't assume
15281 direction flag state at function entry. */
15282 if (TARGET_CLD
15283 || cfun->machine->func_type != TYPE_NORMAL)
15284 return X86_DIRFLAG_ANY;
15286 return X86_DIRFLAG_RESET;
15289 static int
15290 ix86_avx_u128_mode_entry (void)
15292 tree arg;
15294 /* Entry mode is set to AVX_U128_DIRTY if there are
15295 256bit or 512bit modes used in function arguments. */
15296 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
15297 arg = TREE_CHAIN (arg))
15299 rtx incoming = DECL_INCOMING_RTL (arg);
15301 if (incoming && ix86_check_avx_upper_register (incoming))
15302 return AVX_U128_DIRTY;
15305 return AVX_U128_CLEAN;
15308 /* Return a mode that ENTITY is assumed to be
15309 switched to at function entry. */
15311 static int
15312 ix86_mode_entry (int entity)
15314 switch (entity)
15316 case X86_DIRFLAG:
15317 return ix86_dirflag_mode_entry ();
15318 case AVX_U128:
15319 return ix86_avx_u128_mode_entry ();
15320 case I387_ROUNDEVEN:
15321 case I387_TRUNC:
15322 case I387_FLOOR:
15323 case I387_CEIL:
15324 return I387_CW_ANY;
15325 default:
15326 gcc_unreachable ();
15330 static int
15331 ix86_avx_u128_mode_exit (void)
15333 rtx reg = crtl->return_rtx;
15335 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
15336 or 512 bit modes used in the function return register. */
15337 if (reg && ix86_check_avx_upper_register (reg))
15338 return AVX_U128_DIRTY;
15340 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
15341 modes used in function arguments, otherwise return AVX_U128_CLEAN.
15343 return ix86_avx_u128_mode_entry ();
15346 /* Return a mode that ENTITY is assumed to be
15347 switched to at function exit. */
15349 static int
15350 ix86_mode_exit (int entity)
15352 switch (entity)
15354 case X86_DIRFLAG:
15355 return X86_DIRFLAG_ANY;
15356 case AVX_U128:
15357 return ix86_avx_u128_mode_exit ();
15358 case I387_ROUNDEVEN:
15359 case I387_TRUNC:
15360 case I387_FLOOR:
15361 case I387_CEIL:
15362 return I387_CW_ANY;
15363 default:
15364 gcc_unreachable ();
15368 static int
15369 ix86_mode_priority (int, int n)
15371 return n;
15374 /* Output code to initialize control word copies used by trunc?f?i and
15375 rounding patterns. CURRENT_MODE is set to current control word,
15376 while NEW_MODE is set to new control word. */
15378 static void
15379 emit_i387_cw_initialization (int mode)
15381 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15382 rtx new_mode;
15384 enum ix86_stack_slot slot;
15386 rtx reg = gen_reg_rtx (HImode);
15388 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15389 emit_move_insn (reg, copy_rtx (stored_mode));
15391 switch (mode)
15393 case I387_CW_ROUNDEVEN:
15394 /* round to nearest */
15395 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15396 slot = SLOT_CW_ROUNDEVEN;
15397 break;
15399 case I387_CW_TRUNC:
15400 /* round toward zero (truncate) */
15401 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15402 slot = SLOT_CW_TRUNC;
15403 break;
15405 case I387_CW_FLOOR:
15406 /* round down toward -oo */
15407 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15408 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15409 slot = SLOT_CW_FLOOR;
15410 break;
15412 case I387_CW_CEIL:
15413 /* round up toward +oo */
15414 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15415 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15416 slot = SLOT_CW_CEIL;
15417 break;
15419 default:
15420 gcc_unreachable ();
15423 gcc_assert (slot < MAX_386_STACK_LOCALS);
15425 new_mode = assign_386_stack_local (HImode, slot);
15426 emit_move_insn (new_mode, reg);
15429 /* Generate one or more insns to set ENTITY to MODE. */
15431 static void
15432 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
15433 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
15435 switch (entity)
15437 case X86_DIRFLAG:
15438 if (mode == X86_DIRFLAG_RESET)
15439 emit_insn (gen_cld ());
15440 break;
15441 case AVX_U128:
15442 if (mode == AVX_U128_CLEAN)
15443 ix86_expand_avx_vzeroupper ();
15444 break;
15445 case I387_ROUNDEVEN:
15446 case I387_TRUNC:
15447 case I387_FLOOR:
15448 case I387_CEIL:
15449 if (mode != I387_CW_ANY
15450 && mode != I387_CW_UNINITIALIZED)
15451 emit_i387_cw_initialization (mode);
15452 break;
15453 default:
15454 gcc_unreachable ();
15458 /* Output code for INSN to convert a float to a signed int. OPERANDS
15459 are the insn operands. The output may be [HSD]Imode and the input
15460 operand may be [SDX]Fmode. */
15462 const char *
15463 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
15465 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15466 bool dimode_p = GET_MODE (operands[0]) == DImode;
15467 int round_mode = get_attr_i387_cw (insn);
15469 static char buf[40];
15470 const char *p;
15472 /* Jump through a hoop or two for DImode, since the hardware has no
15473 non-popping instruction. We used to do this a different way, but
15474 that was somewhat fragile and broke with post-reload splitters. */
15475 if ((dimode_p || fisttp) && !stack_top_dies)
15476 output_asm_insn ("fld\t%y1", operands);
15478 gcc_assert (STACK_TOP_P (operands[1]));
15479 gcc_assert (MEM_P (operands[0]));
15480 gcc_assert (GET_MODE (operands[1]) != TFmode);
15482 if (fisttp)
15483 return "fisttp%Z0\t%0";
15485 strcpy (buf, "fist");
15487 if (round_mode != I387_CW_ANY)
15488 output_asm_insn ("fldcw\t%3", operands);
15490 p = "p%Z0\t%0";
15491 strcat (buf, p + !(stack_top_dies || dimode_p));
15493 output_asm_insn (buf, operands);
15495 if (round_mode != I387_CW_ANY)
15496 output_asm_insn ("fldcw\t%2", operands);
15498 return "";
15501 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15502 have the values zero or one, indicates the ffreep insn's operand
15503 from the OPERANDS array. */
15505 static const char *
15506 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15508 if (TARGET_USE_FFREEP)
15509 #ifdef HAVE_AS_IX86_FFREEP
15510 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15511 #else
15513 static char retval[32];
15514 int regno = REGNO (operands[opno]);
15516 gcc_assert (STACK_REGNO_P (regno));
15518 regno -= FIRST_STACK_REG;
15520 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15521 return retval;
15523 #endif
15525 return opno ? "fstp\t%y1" : "fstp\t%y0";
15529 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15530 should be used. UNORDERED_P is true when fucom should be used. */
15532 const char *
15533 output_fp_compare (rtx_insn *insn, rtx *operands,
15534 bool eflags_p, bool unordered_p)
15536 rtx *xops = eflags_p ? &operands[0] : &operands[1];
15537 bool stack_top_dies;
15539 static char buf[40];
15540 const char *p;
15542 gcc_assert (STACK_TOP_P (xops[0]));
15544 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
15546 if (eflags_p)
15548 p = unordered_p ? "fucomi" : "fcomi";
15549 strcpy (buf, p);
15551 p = "p\t{%y1, %0|%0, %y1}";
15552 strcat (buf, p + !stack_top_dies);
15554 return buf;
15557 if (STACK_REG_P (xops[1])
15558 && stack_top_dies
15559 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
15561 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
15563 /* If both the top of the 387 stack die, and the other operand
15564 is also a stack register that dies, then this must be a
15565 `fcompp' float compare. */
15566 p = unordered_p ? "fucompp" : "fcompp";
15567 strcpy (buf, p);
15569 else if (const0_operand (xops[1], VOIDmode))
15571 gcc_assert (!unordered_p);
15572 strcpy (buf, "ftst");
15574 else
15576 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
15578 gcc_assert (!unordered_p);
15579 p = "ficom";
15581 else
15582 p = unordered_p ? "fucom" : "fcom";
15584 strcpy (buf, p);
15586 p = "p%Z2\t%y2";
15587 strcat (buf, p + !stack_top_dies);
15590 output_asm_insn (buf, operands);
15591 return "fnstsw\t%0";
15594 void
15595 ix86_output_addr_vec_elt (FILE *file, int value)
15597 const char *directive = ASM_LONG;
15599 #ifdef ASM_QUAD
15600 if (TARGET_LP64)
15601 directive = ASM_QUAD;
15602 #else
15603 gcc_assert (!TARGET_64BIT);
15604 #endif
15606 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15609 void
15610 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15612 const char *directive = ASM_LONG;
15614 #ifdef ASM_QUAD
15615 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15616 directive = ASM_QUAD;
15617 #else
15618 gcc_assert (!TARGET_64BIT);
15619 #endif
15620 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15621 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15622 fprintf (file, "%s%s%d-%s%d\n",
15623 directive, LPREFIX, value, LPREFIX, rel);
15624 #if TARGET_MACHO
15625 else if (TARGET_MACHO)
15627 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15628 machopic_output_function_base_name (file);
15629 putc ('\n', file);
15631 #endif
15632 else if (HAVE_AS_GOTOFF_IN_DATA)
15633 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15634 else
15635 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15636 GOT_SYMBOL_NAME, LPREFIX, value);
15639 #define LEA_MAX_STALL (3)
15640 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
15642 /* Increase given DISTANCE in half-cycles according to
15643 dependencies between PREV and NEXT instructions.
15644 Add 1 half-cycle if there is no dependency and
15645 go to next cycle if there is some dependecy. */
15647 static unsigned int
15648 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
15650 df_ref def, use;
15652 if (!prev || !next)
15653 return distance + (distance & 1) + 2;
15655 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
15656 return distance + 1;
15658 FOR_EACH_INSN_USE (use, next)
15659 FOR_EACH_INSN_DEF (def, prev)
15660 if (!DF_REF_IS_ARTIFICIAL (def)
15661 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
15662 return distance + (distance & 1) + 2;
15664 return distance + 1;
15667 /* Function checks if instruction INSN defines register number
15668 REGNO1 or REGNO2. */
15670 bool
15671 insn_defines_reg (unsigned int regno1, unsigned int regno2,
15672 rtx_insn *insn)
15674 df_ref def;
15676 FOR_EACH_INSN_DEF (def, insn)
15677 if (DF_REF_REG_DEF_P (def)
15678 && !DF_REF_IS_ARTIFICIAL (def)
15679 && (regno1 == DF_REF_REGNO (def)
15680 || regno2 == DF_REF_REGNO (def)))
15681 return true;
15683 return false;
15686 /* Function checks if instruction INSN uses register number
15687 REGNO as a part of address expression. */
15689 static bool
15690 insn_uses_reg_mem (unsigned int regno, rtx insn)
15692 df_ref use;
15694 FOR_EACH_INSN_USE (use, insn)
15695 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
15696 return true;
15698 return false;
15701 /* Search backward for non-agu definition of register number REGNO1
15702 or register number REGNO2 in basic block starting from instruction
15703 START up to head of basic block or instruction INSN.
15705 Function puts true value into *FOUND var if definition was found
15706 and false otherwise.
15708 Distance in half-cycles between START and found instruction or head
15709 of BB is added to DISTANCE and returned. */
15711 static int
15712 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
15713 rtx_insn *insn, int distance,
15714 rtx_insn *start, bool *found)
15716 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
15717 rtx_insn *prev = start;
15718 rtx_insn *next = NULL;
15720 *found = false;
15722 while (prev
15723 && prev != insn
15724 && distance < LEA_SEARCH_THRESHOLD)
15726 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
15728 distance = increase_distance (prev, next, distance);
15729 if (insn_defines_reg (regno1, regno2, prev))
15731 if (recog_memoized (prev) < 0
15732 || get_attr_type (prev) != TYPE_LEA)
15734 *found = true;
15735 return distance;
15739 next = prev;
15741 if (prev == BB_HEAD (bb))
15742 break;
15744 prev = PREV_INSN (prev);
15747 return distance;
15750 /* Search backward for non-agu definition of register number REGNO1
15751 or register number REGNO2 in INSN's basic block until
15752 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15753 2. Reach neighbor BBs boundary, or
15754 3. Reach agu definition.
15755 Returns the distance between the non-agu definition point and INSN.
15756 If no definition point, returns -1. */
15758 static int
15759 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15760 rtx_insn *insn)
15762 basic_block bb = BLOCK_FOR_INSN (insn);
15763 int distance = 0;
15764 bool found = false;
15766 if (insn != BB_HEAD (bb))
15767 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
15768 distance, PREV_INSN (insn),
15769 &found);
15771 if (!found && distance < LEA_SEARCH_THRESHOLD)
15773 edge e;
15774 edge_iterator ei;
15775 bool simple_loop = false;
15777 FOR_EACH_EDGE (e, ei, bb->preds)
15778 if (e->src == bb)
15780 simple_loop = true;
15781 break;
15784 if (simple_loop)
15785 distance = distance_non_agu_define_in_bb (regno1, regno2,
15786 insn, distance,
15787 BB_END (bb), &found);
15788 else
15790 int shortest_dist = -1;
15791 bool found_in_bb = false;
15793 FOR_EACH_EDGE (e, ei, bb->preds)
15795 int bb_dist
15796 = distance_non_agu_define_in_bb (regno1, regno2,
15797 insn, distance,
15798 BB_END (e->src),
15799 &found_in_bb);
15800 if (found_in_bb)
15802 if (shortest_dist < 0)
15803 shortest_dist = bb_dist;
15804 else if (bb_dist > 0)
15805 shortest_dist = MIN (bb_dist, shortest_dist);
15807 found = true;
15811 distance = shortest_dist;
15815 if (!found)
15816 return -1;
15818 return distance >> 1;
15821 /* Return the distance in half-cycles between INSN and the next
15822 insn that uses register number REGNO in memory address added
15823 to DISTANCE. Return -1 if REGNO0 is set.
15825 Put true value into *FOUND if register usage was found and
15826 false otherwise.
15827 Put true value into *REDEFINED if register redefinition was
15828 found and false otherwise. */
15830 static int
15831 distance_agu_use_in_bb (unsigned int regno,
15832 rtx_insn *insn, int distance, rtx_insn *start,
15833 bool *found, bool *redefined)
15835 basic_block bb = NULL;
15836 rtx_insn *next = start;
15837 rtx_insn *prev = NULL;
15839 *found = false;
15840 *redefined = false;
15842 if (start != NULL_RTX)
15844 bb = BLOCK_FOR_INSN (start);
15845 if (start != BB_HEAD (bb))
15846 /* If insn and start belong to the same bb, set prev to insn,
15847 so the call to increase_distance will increase the distance
15848 between insns by 1. */
15849 prev = insn;
15852 while (next
15853 && next != insn
15854 && distance < LEA_SEARCH_THRESHOLD)
15856 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
15858 distance = increase_distance(prev, next, distance);
15859 if (insn_uses_reg_mem (regno, next))
15861 /* Return DISTANCE if OP0 is used in memory
15862 address in NEXT. */
15863 *found = true;
15864 return distance;
15867 if (insn_defines_reg (regno, INVALID_REGNUM, next))
15869 /* Return -1 if OP0 is set in NEXT. */
15870 *redefined = true;
15871 return -1;
15874 prev = next;
15877 if (next == BB_END (bb))
15878 break;
15880 next = NEXT_INSN (next);
15883 return distance;
15886 /* Return the distance between INSN and the next insn that uses
15887 register number REGNO0 in memory address. Return -1 if no such
15888 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15890 static int
15891 distance_agu_use (unsigned int regno0, rtx_insn *insn)
15893 basic_block bb = BLOCK_FOR_INSN (insn);
15894 int distance = 0;
15895 bool found = false;
15896 bool redefined = false;
15898 if (insn != BB_END (bb))
15899 distance = distance_agu_use_in_bb (regno0, insn, distance,
15900 NEXT_INSN (insn),
15901 &found, &redefined);
15903 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
15905 edge e;
15906 edge_iterator ei;
15907 bool simple_loop = false;
15909 FOR_EACH_EDGE (e, ei, bb->succs)
15910 if (e->dest == bb)
15912 simple_loop = true;
15913 break;
15916 if (simple_loop)
15917 distance = distance_agu_use_in_bb (regno0, insn,
15918 distance, BB_HEAD (bb),
15919 &found, &redefined);
15920 else
15922 int shortest_dist = -1;
15923 bool found_in_bb = false;
15924 bool redefined_in_bb = false;
15926 FOR_EACH_EDGE (e, ei, bb->succs)
15928 int bb_dist
15929 = distance_agu_use_in_bb (regno0, insn,
15930 distance, BB_HEAD (e->dest),
15931 &found_in_bb, &redefined_in_bb);
15932 if (found_in_bb)
15934 if (shortest_dist < 0)
15935 shortest_dist = bb_dist;
15936 else if (bb_dist > 0)
15937 shortest_dist = MIN (bb_dist, shortest_dist);
15939 found = true;
15943 distance = shortest_dist;
15947 if (!found || redefined)
15948 return -1;
15950 return distance >> 1;
15953 /* Define this macro to tune LEA priority vs ADD, it take effect when
15954 there is a dilemma of choosing LEA or ADD
15955 Negative value: ADD is more preferred than LEA
15956 Zero: Neutral
15957 Positive value: LEA is more preferred than ADD. */
15958 #define IX86_LEA_PRIORITY 0
15960 /* Return true if usage of lea INSN has performance advantage
15961 over a sequence of instructions. Instructions sequence has
15962 SPLIT_COST cycles higher latency than lea latency. */
15964 static bool
15965 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
15966 unsigned int regno2, int split_cost, bool has_scale)
15968 int dist_define, dist_use;
15970 /* For Atom processors newer than Bonnell, if using a 2-source or
15971 3-source LEA for non-destructive destination purposes, or due to
15972 wanting ability to use SCALE, the use of LEA is justified. */
15973 if (!TARGET_CPU_P (BONNELL))
15975 if (has_scale)
15976 return true;
15977 if (split_cost < 1)
15978 return false;
15979 if (regno0 == regno1 || regno0 == regno2)
15980 return false;
15981 return true;
15984 /* Remember recog_data content. */
15985 struct recog_data_d recog_data_save = recog_data;
15987 dist_define = distance_non_agu_define (regno1, regno2, insn);
15988 dist_use = distance_agu_use (regno0, insn);
15990 /* distance_non_agu_define can call get_attr_type which can call
15991 recog_memoized, restore recog_data back to previous content. */
15992 recog_data = recog_data_save;
15994 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
15996 /* If there is no non AGU operand definition, no AGU
15997 operand usage and split cost is 0 then both lea
15998 and non lea variants have same priority. Currently
15999 we prefer lea for 64 bit code and non lea on 32 bit
16000 code. */
16001 if (dist_use < 0 && split_cost == 0)
16002 return TARGET_64BIT || IX86_LEA_PRIORITY;
16003 else
16004 return true;
16007 /* With longer definitions distance lea is more preferable.
16008 Here we change it to take into account splitting cost and
16009 lea priority. */
16010 dist_define += split_cost + IX86_LEA_PRIORITY;
16012 /* If there is no use in memory addess then we just check
16013 that split cost exceeds AGU stall. */
16014 if (dist_use < 0)
16015 return dist_define > LEA_MAX_STALL;
16017 /* If this insn has both backward non-agu dependence and forward
16018 agu dependence, the one with short distance takes effect. */
16019 return dist_define >= dist_use;
16022 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16023 move and add to avoid AGU stalls. */
16025 bool
16026 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
16028 unsigned int regno0, regno1, regno2;
16030 /* Check if we need to optimize. */
16031 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16032 return false;
16034 regno0 = true_regnum (operands[0]);
16035 regno1 = true_regnum (operands[1]);
16036 regno2 = true_regnum (operands[2]);
16038 /* We need to split only adds with non destructive
16039 destination operand. */
16040 if (regno0 == regno1 || regno0 == regno2)
16041 return false;
16042 else
16043 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
16046 /* Return true if we should emit lea instruction instead of mov
16047 instruction. */
16049 bool
16050 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
16052 unsigned int regno0, regno1;
16054 /* Check if we need to optimize. */
16055 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16056 return false;
16058 /* Use lea for reg to reg moves only. */
16059 if (!REG_P (operands[0]) || !REG_P (operands[1]))
16060 return false;
16062 regno0 = true_regnum (operands[0]);
16063 regno1 = true_regnum (operands[1]);
16065 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
16068 /* Return true if we need to split lea into a sequence of
16069 instructions to avoid AGU stalls during peephole2. */
16071 bool
16072 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
16074 unsigned int regno0, regno1, regno2;
16075 int split_cost;
16076 struct ix86_address parts;
16077 int ok;
16079 /* The "at least two components" test below might not catch simple
16080 move or zero extension insns if parts.base is non-NULL and parts.disp
16081 is const0_rtx as the only components in the address, e.g. if the
16082 register is %rbp or %r13. As this test is much cheaper and moves or
16083 zero extensions are the common case, do this check first. */
16084 if (REG_P (operands[1])
16085 || (SImode_address_operand (operands[1], VOIDmode)
16086 && REG_P (XEXP (operands[1], 0))))
16087 return false;
16089 ok = ix86_decompose_address (operands[1], &parts);
16090 gcc_assert (ok);
16092 /* There should be at least two components in the address. */
16093 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
16094 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
16095 return false;
16097 /* We should not split into add if non legitimate pic
16098 operand is used as displacement. */
16099 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16100 return false;
16102 regno0 = true_regnum (operands[0]) ;
16103 regno1 = INVALID_REGNUM;
16104 regno2 = INVALID_REGNUM;
16106 if (parts.base)
16107 regno1 = true_regnum (parts.base);
16108 if (parts.index)
16109 regno2 = true_regnum (parts.index);
16111 /* Use add for a = a + b and a = b + a since it is faster and shorter
16112 than lea for most processors. For the processors like BONNELL, if
16113 the destination register of LEA holds an actual address which will
16114 be used soon, LEA is better and otherwise ADD is better. */
16115 if (!TARGET_CPU_P (BONNELL)
16116 && parts.scale == 1
16117 && (!parts.disp || parts.disp == const0_rtx)
16118 && (regno0 == regno1 || regno0 == regno2))
16119 return true;
16121 /* Split with -Oz if the encoding requires fewer bytes. */
16122 if (optimize_size > 1
16123 && parts.scale > 1
16124 && !parts.base
16125 && (!parts.disp || parts.disp == const0_rtx))
16126 return true;
16128 /* Check we need to optimize. */
16129 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
16130 return false;
16132 split_cost = 0;
16134 /* Compute how many cycles we will add to execution time
16135 if split lea into a sequence of instructions. */
16136 if (parts.base || parts.index)
16138 /* Have to use mov instruction if non desctructive
16139 destination form is used. */
16140 if (regno1 != regno0 && regno2 != regno0)
16141 split_cost += 1;
16143 /* Have to add index to base if both exist. */
16144 if (parts.base && parts.index)
16145 split_cost += 1;
16147 /* Have to use shift and adds if scale is 2 or greater. */
16148 if (parts.scale > 1)
16150 if (regno0 != regno1)
16151 split_cost += 1;
16152 else if (regno2 == regno0)
16153 split_cost += 4;
16154 else
16155 split_cost += parts.scale;
16158 /* Have to use add instruction with immediate if
16159 disp is non zero. */
16160 if (parts.disp && parts.disp != const0_rtx)
16161 split_cost += 1;
16163 /* Subtract the price of lea. */
16164 split_cost -= 1;
16167 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
16168 parts.scale > 1);
16171 /* Return true if it is ok to optimize an ADD operation to LEA
16172 operation to avoid flag register consumation. For most processors,
16173 ADD is faster than LEA. For the processors like BONNELL, if the
16174 destination register of LEA holds an actual address which will be
16175 used soon, LEA is better and otherwise ADD is better. */
16177 bool
16178 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
16180 unsigned int regno0 = true_regnum (operands[0]);
16181 unsigned int regno1 = true_regnum (operands[1]);
16182 unsigned int regno2 = true_regnum (operands[2]);
16184 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16185 if (regno0 != regno1 && regno0 != regno2)
16186 return true;
16188 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16189 return false;
16191 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
16194 /* Return true if destination reg of SET_BODY is shift count of
16195 USE_BODY. */
16197 static bool
16198 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16200 rtx set_dest;
16201 rtx shift_rtx;
16202 int i;
16204 /* Retrieve destination of SET_BODY. */
16205 switch (GET_CODE (set_body))
16207 case SET:
16208 set_dest = SET_DEST (set_body);
16209 if (!set_dest || !REG_P (set_dest))
16210 return false;
16211 break;
16212 case PARALLEL:
16213 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16214 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16215 use_body))
16216 return true;
16217 /* FALLTHROUGH */
16218 default:
16219 return false;
16222 /* Retrieve shift count of USE_BODY. */
16223 switch (GET_CODE (use_body))
16225 case SET:
16226 shift_rtx = XEXP (use_body, 1);
16227 break;
16228 case PARALLEL:
16229 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16230 if (ix86_dep_by_shift_count_body (set_body,
16231 XVECEXP (use_body, 0, i)))
16232 return true;
16233 /* FALLTHROUGH */
16234 default:
16235 return false;
16238 if (shift_rtx
16239 && (GET_CODE (shift_rtx) == ASHIFT
16240 || GET_CODE (shift_rtx) == LSHIFTRT
16241 || GET_CODE (shift_rtx) == ASHIFTRT
16242 || GET_CODE (shift_rtx) == ROTATE
16243 || GET_CODE (shift_rtx) == ROTATERT))
16245 rtx shift_count = XEXP (shift_rtx, 1);
16247 /* Return true if shift count is dest of SET_BODY. */
16248 if (REG_P (shift_count))
16250 /* Add check since it can be invoked before register
16251 allocation in pre-reload schedule. */
16252 if (reload_completed
16253 && true_regnum (set_dest) == true_regnum (shift_count))
16254 return true;
16255 else if (REGNO(set_dest) == REGNO(shift_count))
16256 return true;
16260 return false;
16263 /* Return true if destination reg of SET_INSN is shift count of
16264 USE_INSN. */
16266 bool
16267 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16269 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16270 PATTERN (use_insn));
16273 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16274 are ok, keeping in mind the possible movddup alternative. */
16276 bool
16277 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16279 if (MEM_P (operands[0]))
16280 return rtx_equal_p (operands[0], operands[1 + high]);
16281 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16282 return false;
16283 return true;
16286 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16287 then replicate the value for all elements of the vector
16288 register. */
16291 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
16293 int i, n_elt;
16294 rtvec v;
16295 machine_mode scalar_mode;
16297 switch (mode)
16299 case E_V64QImode:
16300 case E_V32QImode:
16301 case E_V16QImode:
16302 case E_V32HImode:
16303 case E_V16HImode:
16304 case E_V8HImode:
16305 case E_V16SImode:
16306 case E_V8SImode:
16307 case E_V4SImode:
16308 case E_V2SImode:
16309 case E_V8DImode:
16310 case E_V4DImode:
16311 case E_V2DImode:
16312 gcc_assert (vect);
16313 /* FALLTHRU */
16314 case E_V2HFmode:
16315 case E_V4HFmode:
16316 case E_V8HFmode:
16317 case E_V16HFmode:
16318 case E_V32HFmode:
16319 case E_V16SFmode:
16320 case E_V8SFmode:
16321 case E_V4SFmode:
16322 case E_V2SFmode:
16323 case E_V8DFmode:
16324 case E_V4DFmode:
16325 case E_V2DFmode:
16326 n_elt = GET_MODE_NUNITS (mode);
16327 v = rtvec_alloc (n_elt);
16328 scalar_mode = GET_MODE_INNER (mode);
16330 RTVEC_ELT (v, 0) = value;
16332 for (i = 1; i < n_elt; ++i)
16333 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
16335 return gen_rtx_CONST_VECTOR (mode, v);
16337 default:
16338 gcc_unreachable ();
16342 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16343 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16344 for an SSE register. If VECT is true, then replicate the mask for
16345 all elements of the vector register. If INVERT is true, then create
16346 a mask excluding the sign bit. */
16349 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
16351 machine_mode vec_mode, imode;
16352 wide_int w;
16353 rtx mask, v;
16355 switch (mode)
16357 case E_V2HFmode:
16358 case E_V4HFmode:
16359 case E_V8HFmode:
16360 case E_V16HFmode:
16361 case E_V32HFmode:
16362 vec_mode = mode;
16363 imode = HImode;
16364 break;
16366 case E_V16SImode:
16367 case E_V16SFmode:
16368 case E_V8SImode:
16369 case E_V4SImode:
16370 case E_V8SFmode:
16371 case E_V4SFmode:
16372 case E_V2SFmode:
16373 case E_V2SImode:
16374 vec_mode = mode;
16375 imode = SImode;
16376 break;
16378 case E_V8DImode:
16379 case E_V4DImode:
16380 case E_V2DImode:
16381 case E_V8DFmode:
16382 case E_V4DFmode:
16383 case E_V2DFmode:
16384 vec_mode = mode;
16385 imode = DImode;
16386 break;
16388 case E_TImode:
16389 case E_TFmode:
16390 vec_mode = VOIDmode;
16391 imode = TImode;
16392 break;
16394 default:
16395 gcc_unreachable ();
16398 machine_mode inner_mode = GET_MODE_INNER (mode);
16399 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
16400 GET_MODE_BITSIZE (inner_mode));
16401 if (invert)
16402 w = wi::bit_not (w);
16404 /* Force this value into the low part of a fp vector constant. */
16405 mask = immed_wide_int_const (w, imode);
16406 mask = gen_lowpart (inner_mode, mask);
16408 if (vec_mode == VOIDmode)
16409 return force_reg (inner_mode, mask);
16411 v = ix86_build_const_vector (vec_mode, vect, mask);
16412 return force_reg (vec_mode, v);
16415 /* Return HOST_WIDE_INT for const vector OP in MODE. */
16417 HOST_WIDE_INT
16418 ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
16420 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16421 gcc_unreachable ();
16423 int nunits = GET_MODE_NUNITS (mode);
16424 wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
16425 machine_mode innermode = GET_MODE_INNER (mode);
16426 unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
16428 switch (mode)
16430 case E_V2QImode:
16431 case E_V4QImode:
16432 case E_V2HImode:
16433 case E_V8QImode:
16434 case E_V4HImode:
16435 case E_V2SImode:
16436 for (int i = 0; i < nunits; ++i)
16438 int v = INTVAL (XVECEXP (op, 0, i));
16439 wide_int wv = wi::shwi (v, innermode_bits);
16440 val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
16442 break;
16443 case E_V2HFmode:
16444 case E_V2BFmode:
16445 case E_V4HFmode:
16446 case E_V4BFmode:
16447 case E_V2SFmode:
16448 for (int i = 0; i < nunits; ++i)
16450 rtx x = XVECEXP (op, 0, i);
16451 int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
16452 REAL_MODE_FORMAT (innermode));
16453 wide_int wv = wi::shwi (v, innermode_bits);
16454 val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
16456 break;
16457 default:
16458 gcc_unreachable ();
16461 return val.to_shwi ();
16464 /* Return TRUE or FALSE depending on whether the first SET in INSN
16465 has source and destination with matching CC modes, and that the
16466 CC mode is at least as constrained as REQ_MODE. */
16468 bool
16469 ix86_match_ccmode (rtx insn, machine_mode req_mode)
16471 rtx set;
16472 machine_mode set_mode;
16474 set = PATTERN (insn);
16475 if (GET_CODE (set) == PARALLEL)
16476 set = XVECEXP (set, 0, 0);
16477 gcc_assert (GET_CODE (set) == SET);
16478 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16480 set_mode = GET_MODE (SET_DEST (set));
16481 switch (set_mode)
16483 case E_CCNOmode:
16484 if (req_mode != CCNOmode
16485 && (req_mode != CCmode
16486 || XEXP (SET_SRC (set), 1) != const0_rtx))
16487 return false;
16488 break;
16489 case E_CCmode:
16490 if (req_mode == CCGCmode)
16491 return false;
16492 /* FALLTHRU */
16493 case E_CCGCmode:
16494 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16495 return false;
16496 /* FALLTHRU */
16497 case E_CCGOCmode:
16498 if (req_mode == CCZmode)
16499 return false;
16500 /* FALLTHRU */
16501 case E_CCZmode:
16502 break;
16504 case E_CCGZmode:
16506 case E_CCAmode:
16507 case E_CCCmode:
16508 case E_CCOmode:
16509 case E_CCPmode:
16510 case E_CCSmode:
16511 if (set_mode != req_mode)
16512 return false;
16513 break;
16515 default:
16516 gcc_unreachable ();
16519 return GET_MODE (SET_SRC (set)) == set_mode;
16522 machine_mode
16523 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16525 machine_mode mode = GET_MODE (op0);
16527 if (SCALAR_FLOAT_MODE_P (mode))
16529 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16530 return CCFPmode;
16533 switch (code)
16535 /* Only zero flag is needed. */
16536 case EQ: /* ZF=0 */
16537 case NE: /* ZF!=0 */
16538 return CCZmode;
16539 /* Codes needing carry flag. */
16540 case GEU: /* CF=0 */
16541 case LTU: /* CF=1 */
16542 rtx geu;
16543 /* Detect overflow checks. They need just the carry flag. */
16544 if (GET_CODE (op0) == PLUS
16545 && (rtx_equal_p (op1, XEXP (op0, 0))
16546 || rtx_equal_p (op1, XEXP (op0, 1))))
16547 return CCCmode;
16548 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
16549 Match LTU of op0
16550 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
16551 and op1
16552 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
16553 where CC_CCC is either CC or CCC. */
16554 else if (code == LTU
16555 && GET_CODE (op0) == NEG
16556 && GET_CODE (geu = XEXP (op0, 0)) == GEU
16557 && REG_P (XEXP (geu, 0))
16558 && (GET_MODE (XEXP (geu, 0)) == CCCmode
16559 || GET_MODE (XEXP (geu, 0)) == CCmode)
16560 && REGNO (XEXP (geu, 0)) == FLAGS_REG
16561 && XEXP (geu, 1) == const0_rtx
16562 && GET_CODE (op1) == LTU
16563 && REG_P (XEXP (op1, 0))
16564 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
16565 && REGNO (XEXP (op1, 0)) == FLAGS_REG
16566 && XEXP (op1, 1) == const0_rtx)
16567 return CCCmode;
16568 /* Similarly for *x86_cmc pattern.
16569 Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
16570 and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
16571 It is sufficient to test that the operand modes are CCCmode. */
16572 else if (code == LTU
16573 && GET_CODE (op0) == NEG
16574 && GET_CODE (XEXP (op0, 0)) == LTU
16575 && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
16576 && GET_CODE (op1) == GEU
16577 && GET_MODE (XEXP (op1, 0)) == CCCmode)
16578 return CCCmode;
16579 else
16580 return CCmode;
16581 case GTU: /* CF=0 & ZF=0 */
16582 case LEU: /* CF=1 | ZF=1 */
16583 return CCmode;
16584 /* Codes possibly doable only with sign flag when
16585 comparing against zero. */
16586 case GE: /* SF=OF or SF=0 */
16587 case LT: /* SF<>OF or SF=1 */
16588 if (op1 == const0_rtx)
16589 return CCGOCmode;
16590 else
16591 /* For other cases Carry flag is not required. */
16592 return CCGCmode;
16593 /* Codes doable only with sign flag when comparing
16594 against zero, but we miss jump instruction for it
16595 so we need to use relational tests against overflow
16596 that thus needs to be zero. */
16597 case GT: /* ZF=0 & SF=OF */
16598 case LE: /* ZF=1 | SF<>OF */
16599 if (op1 == const0_rtx)
16600 return CCNOmode;
16601 else
16602 return CCGCmode;
16603 default:
16604 /* CCmode should be used in all other cases. */
16605 return CCmode;
16609 /* Return TRUE or FALSE depending on whether the ptest instruction
16610 INSN has source and destination with suitable matching CC modes. */
16612 bool
16613 ix86_match_ptest_ccmode (rtx insn)
16615 rtx set, src;
16616 machine_mode set_mode;
16618 set = PATTERN (insn);
16619 gcc_assert (GET_CODE (set) == SET);
16620 src = SET_SRC (set);
16621 gcc_assert (GET_CODE (src) == UNSPEC
16622 && XINT (src, 1) == UNSPEC_PTEST);
16624 set_mode = GET_MODE (src);
16625 if (set_mode != CCZmode
16626 && set_mode != CCCmode
16627 && set_mode != CCmode)
16628 return false;
16629 return GET_MODE (SET_DEST (set)) == set_mode;
16632 /* Return the fixed registers used for condition codes. */
16634 static bool
16635 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16637 *p1 = FLAGS_REG;
16638 *p2 = INVALID_REGNUM;
16639 return true;
16642 /* If two condition code modes are compatible, return a condition code
16643 mode which is compatible with both. Otherwise, return
16644 VOIDmode. */
16646 static machine_mode
16647 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
16649 if (m1 == m2)
16650 return m1;
16652 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16653 return VOIDmode;
16655 if ((m1 == CCGCmode && m2 == CCGOCmode)
16656 || (m1 == CCGOCmode && m2 == CCGCmode))
16657 return CCGCmode;
16659 if ((m1 == CCNOmode && m2 == CCGOCmode)
16660 || (m1 == CCGOCmode && m2 == CCNOmode))
16661 return CCNOmode;
16663 if (m1 == CCZmode
16664 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
16665 return m2;
16666 else if (m2 == CCZmode
16667 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
16668 return m1;
16670 switch (m1)
16672 default:
16673 gcc_unreachable ();
16675 case E_CCmode:
16676 case E_CCGCmode:
16677 case E_CCGOCmode:
16678 case E_CCNOmode:
16679 case E_CCAmode:
16680 case E_CCCmode:
16681 case E_CCOmode:
16682 case E_CCPmode:
16683 case E_CCSmode:
16684 case E_CCZmode:
16685 switch (m2)
16687 default:
16688 return VOIDmode;
16690 case E_CCmode:
16691 case E_CCGCmode:
16692 case E_CCGOCmode:
16693 case E_CCNOmode:
16694 case E_CCAmode:
16695 case E_CCCmode:
16696 case E_CCOmode:
16697 case E_CCPmode:
16698 case E_CCSmode:
16699 case E_CCZmode:
16700 return CCmode;
16703 case E_CCFPmode:
16704 /* These are only compatible with themselves, which we already
16705 checked above. */
16706 return VOIDmode;
16710 /* Return strategy to use for floating-point. We assume that fcomi is always
16711 preferrable where available, since that is also true when looking at size
16712 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16714 enum ix86_fpcmp_strategy
16715 ix86_fp_comparison_strategy (enum rtx_code)
16717 /* Do fcomi/sahf based test when profitable. */
16719 if (TARGET_CMOVE)
16720 return IX86_FPCMP_COMI;
16722 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
16723 return IX86_FPCMP_SAHF;
16725 return IX86_FPCMP_ARITH;
16728 /* Convert comparison codes we use to represent FP comparison to integer
16729 code that will result in proper branch. Return UNKNOWN if no such code
16730 is available. */
16732 enum rtx_code
16733 ix86_fp_compare_code_to_integer (enum rtx_code code)
16735 switch (code)
16737 case GT:
16738 return GTU;
16739 case GE:
16740 return GEU;
16741 case ORDERED:
16742 case UNORDERED:
16743 return code;
16744 case UNEQ:
16745 return EQ;
16746 case UNLT:
16747 return LTU;
16748 case UNLE:
16749 return LEU;
16750 case LTGT:
16751 return NE;
16752 default:
16753 return UNKNOWN;
16757 /* Zero extend possibly SImode EXP to Pmode register. */
16759 ix86_zero_extend_to_Pmode (rtx exp)
16761 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
16764 /* Return true if the function is called via PLT. */
16766 bool
16767 ix86_call_use_plt_p (rtx call_op)
16769 if (SYMBOL_REF_LOCAL_P (call_op))
16771 if (SYMBOL_REF_DECL (call_op)
16772 && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
16774 /* NB: All ifunc functions must be called via PLT. */
16775 cgraph_node *node
16776 = cgraph_node::get (SYMBOL_REF_DECL (call_op));
16777 if (node && node->ifunc_resolver)
16778 return true;
16780 return false;
16782 return true;
16785 /* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true,
16786 the PLT entry will be used as the function address for local IFUNC
16787 functions. When the PIC register is needed for PLT call, indirect
16788 call via the PLT entry will fail since the PIC register may not be
16789 set up properly for indirect call. In this case, we should return
16790 false. */
16792 static bool
16793 ix86_ifunc_ref_local_ok (void)
16795 return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
16798 /* Return true if the function being called was marked with attribute
16799 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
16800 to handle the non-PIC case in the backend because there is no easy
16801 interface for the front-end to force non-PLT calls to use the GOT.
16802 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
16803 to call the function marked "noplt" indirectly. */
16805 static bool
16806 ix86_nopic_noplt_attribute_p (rtx call_op)
16808 if (flag_pic || ix86_cmodel == CM_LARGE
16809 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
16810 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
16811 || SYMBOL_REF_LOCAL_P (call_op))
16812 return false;
16814 tree symbol_decl = SYMBOL_REF_DECL (call_op);
16816 if (!flag_plt
16817 || (symbol_decl != NULL_TREE
16818 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
16819 return true;
16821 return false;
16824 /* Helper to output the jmp/call. */
16825 static void
16826 ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
16828 if (thunk_name != NULL)
16830 if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
16831 && ix86_indirect_branch_cs_prefix)
16832 fprintf (asm_out_file, "\tcs\n");
16833 fprintf (asm_out_file, "\tjmp\t");
16834 assemble_name (asm_out_file, thunk_name);
16835 putc ('\n', asm_out_file);
16836 if ((ix86_harden_sls & harden_sls_indirect_jmp))
16837 fputs ("\tint3\n", asm_out_file);
16839 else
16840 output_indirect_thunk (regno);
16843 /* Output indirect branch via a call and return thunk. CALL_OP is a
16844 register which contains the branch target. XASM is the assembly
16845 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
16846 A normal call is converted to:
16848 call __x86_indirect_thunk_reg
16850 and a tail call is converted to:
16852 jmp __x86_indirect_thunk_reg
16855 static void
16856 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
16858 char thunk_name_buf[32];
16859 char *thunk_name;
16860 enum indirect_thunk_prefix need_prefix
16861 = indirect_thunk_need_prefix (current_output_insn);
16862 int regno = REGNO (call_op);
16864 if (cfun->machine->indirect_branch_type
16865 != indirect_branch_thunk_inline)
16867 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
16868 SET_HARD_REG_BIT (indirect_thunks_used, regno);
16870 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
16871 thunk_name = thunk_name_buf;
16873 else
16874 thunk_name = NULL;
16876 if (sibcall_p)
16877 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16878 else
16880 if (thunk_name != NULL)
16882 if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno))
16883 && ix86_indirect_branch_cs_prefix)
16884 fprintf (asm_out_file, "\tcs\n");
16885 fprintf (asm_out_file, "\tcall\t");
16886 assemble_name (asm_out_file, thunk_name);
16887 putc ('\n', asm_out_file);
16888 return;
16891 char indirectlabel1[32];
16892 char indirectlabel2[32];
16894 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
16895 INDIRECT_LABEL,
16896 indirectlabelno++);
16897 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
16898 INDIRECT_LABEL,
16899 indirectlabelno++);
16901 /* Jump. */
16902 fputs ("\tjmp\t", asm_out_file);
16903 assemble_name_raw (asm_out_file, indirectlabel2);
16904 fputc ('\n', asm_out_file);
16906 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
16908 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16910 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
16912 /* Call. */
16913 fputs ("\tcall\t", asm_out_file);
16914 assemble_name_raw (asm_out_file, indirectlabel1);
16915 fputc ('\n', asm_out_file);
16919 /* Output indirect branch via a call and return thunk. CALL_OP is
16920 the branch target. XASM is the assembly template for CALL_OP.
16921 Branch is a tail call if SIBCALL_P is true. A normal call is
16922 converted to:
16924 jmp L2
16926 push CALL_OP
16927 jmp __x86_indirect_thunk
16929 call L1
16931 and a tail call is converted to:
16933 push CALL_OP
16934 jmp __x86_indirect_thunk
16937 static void
16938 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
16939 bool sibcall_p)
16941 char thunk_name_buf[32];
16942 char *thunk_name;
16943 char push_buf[64];
16944 enum indirect_thunk_prefix need_prefix
16945 = indirect_thunk_need_prefix (current_output_insn);
16946 int regno = -1;
16948 if (cfun->machine->indirect_branch_type
16949 != indirect_branch_thunk_inline)
16951 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
16952 indirect_thunk_needed = true;
16953 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
16954 thunk_name = thunk_name_buf;
16956 else
16957 thunk_name = NULL;
16959 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
16960 TARGET_64BIT ? 'q' : 'l', xasm);
16962 if (sibcall_p)
16964 output_asm_insn (push_buf, &call_op);
16965 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16967 else
16969 char indirectlabel1[32];
16970 char indirectlabel2[32];
16972 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
16973 INDIRECT_LABEL,
16974 indirectlabelno++);
16975 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
16976 INDIRECT_LABEL,
16977 indirectlabelno++);
16979 /* Jump. */
16980 fputs ("\tjmp\t", asm_out_file);
16981 assemble_name_raw (asm_out_file, indirectlabel2);
16982 fputc ('\n', asm_out_file);
16984 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
16986 /* An external function may be called via GOT, instead of PLT. */
16987 if (MEM_P (call_op))
16989 struct ix86_address parts;
16990 rtx addr = XEXP (call_op, 0);
16991 if (ix86_decompose_address (addr, &parts)
16992 && parts.base == stack_pointer_rtx)
16994 /* Since call will adjust stack by -UNITS_PER_WORD,
16995 we must convert "disp(stack, index, scale)" to
16996 "disp+UNITS_PER_WORD(stack, index, scale)". */
16997 if (parts.index)
16999 addr = gen_rtx_MULT (Pmode, parts.index,
17000 GEN_INT (parts.scale));
17001 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17002 addr);
17004 else
17005 addr = stack_pointer_rtx;
17007 rtx disp;
17008 if (parts.disp != NULL_RTX)
17009 disp = plus_constant (Pmode, parts.disp,
17010 UNITS_PER_WORD);
17011 else
17012 disp = GEN_INT (UNITS_PER_WORD);
17014 addr = gen_rtx_PLUS (Pmode, addr, disp);
17015 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
17019 output_asm_insn (push_buf, &call_op);
17021 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
17023 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
17025 /* Call. */
17026 fputs ("\tcall\t", asm_out_file);
17027 assemble_name_raw (asm_out_file, indirectlabel1);
17028 fputc ('\n', asm_out_file);
17032 /* Output indirect branch via a call and return thunk. CALL_OP is
17033 the branch target. XASM is the assembly template for CALL_OP.
17034 Branch is a tail call if SIBCALL_P is true. */
17036 static void
17037 ix86_output_indirect_branch (rtx call_op, const char *xasm,
17038 bool sibcall_p)
17040 if (REG_P (call_op))
17041 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
17042 else
17043 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
17046 /* Output indirect jump. CALL_OP is the jump target. */
17048 const char *
17049 ix86_output_indirect_jmp (rtx call_op)
17051 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
17053 /* We can't have red-zone since "call" in the indirect thunk
17054 pushes the return address onto stack, destroying red-zone. */
17055 if (ix86_red_zone_used)
17056 gcc_unreachable ();
17058 ix86_output_indirect_branch (call_op, "%0", true);
17060 else
17061 output_asm_insn ("%!jmp\t%A0", &call_op);
17062 return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
17065 /* Output return instrumentation for current function if needed. */
17067 static void
17068 output_return_instrumentation (void)
17070 if (ix86_instrument_return != instrument_return_none
17071 && flag_fentry
17072 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
17074 if (ix86_flag_record_return)
17075 fprintf (asm_out_file, "1:\n");
17076 switch (ix86_instrument_return)
17078 case instrument_return_call:
17079 fprintf (asm_out_file, "\tcall\t__return__\n");
17080 break;
17081 case instrument_return_nop5:
17082 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
17083 fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
17084 break;
17085 case instrument_return_none:
17086 break;
17089 if (ix86_flag_record_return)
17091 fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
17092 fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
17093 fprintf (asm_out_file, "\t.previous\n");
17098 /* Output function return. CALL_OP is the jump target. Add a REP
17099 prefix to RET if LONG_P is true and function return is kept. */
17101 const char *
17102 ix86_output_function_return (bool long_p)
17104 output_return_instrumentation ();
17106 if (cfun->machine->function_return_type != indirect_branch_keep)
17108 char thunk_name[32];
17109 enum indirect_thunk_prefix need_prefix
17110 = indirect_thunk_need_prefix (current_output_insn);
17112 if (cfun->machine->function_return_type
17113 != indirect_branch_thunk_inline)
17115 bool need_thunk = (cfun->machine->function_return_type
17116 == indirect_branch_thunk);
17117 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
17118 true);
17119 indirect_return_needed |= need_thunk;
17120 fprintf (asm_out_file, "\tjmp\t");
17121 assemble_name (asm_out_file, thunk_name);
17122 putc ('\n', asm_out_file);
17124 else
17125 output_indirect_thunk (INVALID_REGNUM);
17127 return "";
17130 output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
17131 return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
17134 /* Output indirect function return. RET_OP is the function return
17135 target. */
17137 const char *
17138 ix86_output_indirect_function_return (rtx ret_op)
17140 if (cfun->machine->function_return_type != indirect_branch_keep)
17142 char thunk_name[32];
17143 enum indirect_thunk_prefix need_prefix
17144 = indirect_thunk_need_prefix (current_output_insn);
17145 unsigned int regno = REGNO (ret_op);
17146 gcc_assert (regno == CX_REG);
17148 if (cfun->machine->function_return_type
17149 != indirect_branch_thunk_inline)
17151 bool need_thunk = (cfun->machine->function_return_type
17152 == indirect_branch_thunk);
17153 indirect_thunk_name (thunk_name, regno, need_prefix, true);
17155 if (need_thunk)
17157 indirect_return_via_cx = true;
17158 SET_HARD_REG_BIT (indirect_thunks_used, CX_REG);
17160 fprintf (asm_out_file, "\tjmp\t");
17161 assemble_name (asm_out_file, thunk_name);
17162 putc ('\n', asm_out_file);
17164 else
17165 output_indirect_thunk (regno);
17167 else
17169 output_asm_insn ("%!jmp\t%A0", &ret_op);
17170 if (ix86_harden_sls & harden_sls_indirect_jmp)
17171 fputs ("\tint3\n", asm_out_file);
17173 return "";
17176 /* Output the assembly for a call instruction. */
17178 const char *
17179 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
17181 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
17182 bool output_indirect_p
17183 = (!TARGET_SEH
17184 && cfun->machine->indirect_branch_type != indirect_branch_keep);
17185 bool seh_nop_p = false;
17186 const char *xasm;
17188 if (SIBLING_CALL_P (insn))
17190 output_return_instrumentation ();
17191 if (direct_p)
17193 if (ix86_nopic_noplt_attribute_p (call_op))
17195 direct_p = false;
17196 if (TARGET_64BIT)
17198 if (output_indirect_p)
17199 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17200 else
17201 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17203 else
17205 if (output_indirect_p)
17206 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17207 else
17208 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17211 else
17212 xasm = "%!jmp\t%P0";
17214 /* SEH epilogue detection requires the indirect branch case
17215 to include REX.W. */
17216 else if (TARGET_SEH)
17217 xasm = "%!rex.W jmp\t%A0";
17218 else
17220 if (output_indirect_p)
17221 xasm = "%0";
17222 else
17223 xasm = "%!jmp\t%A0";
17226 if (output_indirect_p && !direct_p)
17227 ix86_output_indirect_branch (call_op, xasm, true);
17228 else
17230 output_asm_insn (xasm, &call_op);
17231 if (!direct_p
17232 && (ix86_harden_sls & harden_sls_indirect_jmp))
17233 return "int3";
17235 return "";
17238 /* SEH unwinding can require an extra nop to be emitted in several
17239 circumstances. Determine if we have one of those. */
17240 if (TARGET_SEH)
17242 rtx_insn *i;
17244 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
17246 /* Prevent a catch region from being adjacent to a jump that would
17247 be interpreted as an epilogue sequence by the unwinder. */
17248 if (JUMP_P(i) && CROSSING_JUMP_P (i))
17250 seh_nop_p = true;
17251 break;
17254 /* If we get to another real insn, we don't need the nop. */
17255 if (INSN_P (i))
17256 break;
17258 /* If we get to the epilogue note, prevent a catch region from
17259 being adjacent to the standard epilogue sequence. Note that,
17260 if non-call exceptions are enabled, we already did it during
17261 epilogue expansion, or else, if the insn can throw internally,
17262 we already did it during the reorg pass. */
17263 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
17264 && !flag_non_call_exceptions
17265 && !can_throw_internal (insn))
17267 seh_nop_p = true;
17268 break;
17272 /* If we didn't find a real insn following the call, prevent the
17273 unwinder from looking into the next function. */
17274 if (i == NULL)
17275 seh_nop_p = true;
17278 if (direct_p)
17280 if (ix86_nopic_noplt_attribute_p (call_op))
17282 direct_p = false;
17283 if (TARGET_64BIT)
17285 if (output_indirect_p)
17286 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17287 else
17288 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17290 else
17292 if (output_indirect_p)
17293 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17294 else
17295 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17298 else
17299 xasm = "%!call\t%P0";
17301 else
17303 if (output_indirect_p)
17304 xasm = "%0";
17305 else
17306 xasm = "%!call\t%A0";
17309 if (output_indirect_p && !direct_p)
17310 ix86_output_indirect_branch (call_op, xasm, false);
17311 else
17312 output_asm_insn (xasm, &call_op);
17314 if (seh_nop_p)
17315 return "nop";
17317 return "";
17320 /* Return a MEM corresponding to a stack slot with mode MODE.
17321 Allocate a new slot if necessary.
17323 The RTL for a function can have several slots available: N is
17324 which slot to use. */
17327 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
17329 struct stack_local_entry *s;
17331 gcc_assert (n < MAX_386_STACK_LOCALS);
17333 for (s = ix86_stack_locals; s; s = s->next)
17334 if (s->mode == mode && s->n == n)
17335 return validize_mem (copy_rtx (s->rtl));
17337 int align = 0;
17338 /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
17339 alignment with -m32 -mpreferred-stack-boundary=2. */
17340 if (mode == DImode
17341 && !TARGET_64BIT
17342 && n == SLOT_FLOATxFDI_387
17343 && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
17344 align = 32;
17345 s = ggc_alloc<stack_local_entry> ();
17346 s->n = n;
17347 s->mode = mode;
17348 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
17350 s->next = ix86_stack_locals;
17351 ix86_stack_locals = s;
17352 return validize_mem (copy_rtx (s->rtl));
17355 static void
17356 ix86_instantiate_decls (void)
17358 struct stack_local_entry *s;
17360 for (s = ix86_stack_locals; s; s = s->next)
17361 if (s->rtl != NULL_RTX)
17362 instantiate_decl_rtl (s->rtl);
17365 /* Check whether x86 address PARTS is a pc-relative address. */
17367 bool
17368 ix86_rip_relative_addr_p (struct ix86_address *parts)
17370 rtx base, index, disp;
17372 base = parts->base;
17373 index = parts->index;
17374 disp = parts->disp;
17376 if (disp && !base && !index)
17378 if (TARGET_64BIT)
17380 rtx symbol = disp;
17382 if (GET_CODE (disp) == CONST)
17383 symbol = XEXP (disp, 0);
17384 if (GET_CODE (symbol) == PLUS
17385 && CONST_INT_P (XEXP (symbol, 1)))
17386 symbol = XEXP (symbol, 0);
17388 if (GET_CODE (symbol) == LABEL_REF
17389 || (GET_CODE (symbol) == SYMBOL_REF
17390 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
17391 || (GET_CODE (symbol) == UNSPEC
17392 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
17393 || XINT (symbol, 1) == UNSPEC_PCREL
17394 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
17395 return true;
17398 return false;
17401 /* Calculate the length of the memory address in the instruction encoding.
17402 Includes addr32 prefix, does not include the one-byte modrm, opcode,
17403 or other prefixes. We never generate addr32 prefix for LEA insn. */
17406 memory_address_length (rtx addr, bool lea)
17408 struct ix86_address parts;
17409 rtx base, index, disp;
17410 int len;
17411 int ok;
17413 if (GET_CODE (addr) == PRE_DEC
17414 || GET_CODE (addr) == POST_INC
17415 || GET_CODE (addr) == PRE_MODIFY
17416 || GET_CODE (addr) == POST_MODIFY)
17417 return 0;
17419 ok = ix86_decompose_address (addr, &parts);
17420 gcc_assert (ok);
17422 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
17424 /* If this is not LEA instruction, add the length of addr32 prefix. */
17425 if (TARGET_64BIT && !lea
17426 && (SImode_address_operand (addr, VOIDmode)
17427 || (parts.base && GET_MODE (parts.base) == SImode)
17428 || (parts.index && GET_MODE (parts.index) == SImode)))
17429 len++;
17431 base = parts.base;
17432 index = parts.index;
17433 disp = parts.disp;
17435 if (base && SUBREG_P (base))
17436 base = SUBREG_REG (base);
17437 if (index && SUBREG_P (index))
17438 index = SUBREG_REG (index);
17440 gcc_assert (base == NULL_RTX || REG_P (base));
17441 gcc_assert (index == NULL_RTX || REG_P (index));
17443 /* Rule of thumb:
17444 - esp as the base always wants an index,
17445 - ebp as the base always wants a displacement,
17446 - r12 as the base always wants an index,
17447 - r13 as the base always wants a displacement. */
17449 /* Register Indirect. */
17450 if (base && !index && !disp)
17452 /* esp (for its index) and ebp (for its displacement) need
17453 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
17454 code. */
17455 if (base == arg_pointer_rtx
17456 || base == frame_pointer_rtx
17457 || REGNO (base) == SP_REG
17458 || REGNO (base) == BP_REG
17459 || REGNO (base) == R12_REG
17460 || REGNO (base) == R13_REG)
17461 len++;
17464 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
17465 is not disp32, but disp32(%rip), so for disp32
17466 SIB byte is needed, unless print_operand_address
17467 optimizes it into disp32(%rip) or (%rip) is implied
17468 by UNSPEC. */
17469 else if (disp && !base && !index)
17471 len += 4;
17472 if (!ix86_rip_relative_addr_p (&parts))
17473 len++;
17475 else
17477 /* Find the length of the displacement constant. */
17478 if (disp)
17480 if (base && satisfies_constraint_K (disp))
17481 len += 1;
17482 else
17483 len += 4;
17485 /* ebp always wants a displacement. Similarly r13. */
17486 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
17487 len++;
17489 /* An index requires the two-byte modrm form.... */
17490 if (index
17491 /* ...like esp (or r12), which always wants an index. */
17492 || base == arg_pointer_rtx
17493 || base == frame_pointer_rtx
17494 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
17495 len++;
17498 return len;
17501 /* Compute default value for "length_immediate" attribute. When SHORTFORM
17502 is set, expect that insn have 8bit immediate alternative. */
17504 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
17506 int len = 0;
17507 int i;
17508 extract_insn_cached (insn);
17509 for (i = recog_data.n_operands - 1; i >= 0; --i)
17510 if (CONSTANT_P (recog_data.operand[i]))
17512 enum attr_mode mode = get_attr_mode (insn);
17514 gcc_assert (!len);
17515 if (shortform && CONST_INT_P (recog_data.operand[i]))
17517 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
17518 switch (mode)
17520 case MODE_QI:
17521 len = 1;
17522 continue;
17523 case MODE_HI:
17524 ival = trunc_int_for_mode (ival, HImode);
17525 break;
17526 case MODE_SI:
17527 ival = trunc_int_for_mode (ival, SImode);
17528 break;
17529 default:
17530 break;
17532 if (IN_RANGE (ival, -128, 127))
17534 len = 1;
17535 continue;
17538 switch (mode)
17540 case MODE_QI:
17541 len = 1;
17542 break;
17543 case MODE_HI:
17544 len = 2;
17545 break;
17546 case MODE_SI:
17547 len = 4;
17548 break;
17549 /* Immediates for DImode instructions are encoded
17550 as 32bit sign extended values. */
17551 case MODE_DI:
17552 len = 4;
17553 break;
17554 default:
17555 fatal_insn ("unknown insn mode", insn);
17558 return len;
17561 /* Compute default value for "length_address" attribute. */
17563 ix86_attr_length_address_default (rtx_insn *insn)
17565 int i;
17567 if (get_attr_type (insn) == TYPE_LEA)
17569 rtx set = PATTERN (insn), addr;
17571 if (GET_CODE (set) == PARALLEL)
17572 set = XVECEXP (set, 0, 0);
17574 gcc_assert (GET_CODE (set) == SET);
17576 addr = SET_SRC (set);
17578 return memory_address_length (addr, true);
17581 extract_insn_cached (insn);
17582 for (i = recog_data.n_operands - 1; i >= 0; --i)
17584 rtx op = recog_data.operand[i];
17585 if (MEM_P (op))
17587 constrain_operands_cached (insn, reload_completed);
17588 if (which_alternative != -1)
17590 const char *constraints = recog_data.constraints[i];
17591 int alt = which_alternative;
17593 while (*constraints == '=' || *constraints == '+')
17594 constraints++;
17595 while (alt-- > 0)
17596 while (*constraints++ != ',')
17598 /* Skip ignored operands. */
17599 if (*constraints == 'X')
17600 continue;
17603 int len = memory_address_length (XEXP (op, 0), false);
17605 /* Account for segment prefix for non-default addr spaces. */
17606 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
17607 len++;
17609 return len;
17612 return 0;
17615 /* Compute default value for "length_vex" attribute. It includes
17616 2 or 3 byte VEX prefix and 1 opcode byte. */
17619 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
17620 bool has_vex_w)
17622 int i, reg_only = 2 + 1;
17623 bool has_mem = false;
17625 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
17626 byte VEX prefix. */
17627 if (!has_0f_opcode || has_vex_w)
17628 return 3 + 1;
17630 /* We can always use 2 byte VEX prefix in 32bit. */
17631 if (!TARGET_64BIT)
17632 return 2 + 1;
17634 extract_insn_cached (insn);
17636 for (i = recog_data.n_operands - 1; i >= 0; --i)
17637 if (REG_P (recog_data.operand[i]))
17639 /* REX.W bit uses 3 byte VEX prefix.
17640 REX2 with vex use extended EVEX prefix length is 4-byte. */
17641 if (GET_MODE (recog_data.operand[i]) == DImode
17642 && GENERAL_REG_P (recog_data.operand[i]))
17643 return 3 + 1;
17645 /* REX.B bit requires 3-byte VEX. Right here we don't know which
17646 operand will be encoded using VEX.B, so be conservative.
17647 REX2 with vex use extended EVEX prefix length is 4-byte. */
17648 if (REX_INT_REGNO_P (recog_data.operand[i])
17649 || REX2_INT_REGNO_P (recog_data.operand[i])
17650 || REX_SSE_REGNO_P (recog_data.operand[i]))
17651 reg_only = 3 + 1;
17653 else if (MEM_P (recog_data.operand[i]))
17655 /* REX2.X or REX2.B bits use 3 byte VEX prefix. */
17656 if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i]))
17657 return 4;
17659 /* REX.X or REX.B bits use 3 byte VEX prefix. */
17660 if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
17661 return 3 + 1;
17663 has_mem = true;
17666 return has_mem ? 2 + 1 : reg_only;
17670 static bool
17671 ix86_class_likely_spilled_p (reg_class_t);
17673 /* Returns true if lhs of insn is HW function argument register and set up
17674 is_spilled to true if it is likely spilled HW register. */
17675 static bool
17676 insn_is_function_arg (rtx insn, bool* is_spilled)
17678 rtx dst;
17680 if (!NONDEBUG_INSN_P (insn))
17681 return false;
17682 /* Call instructions are not movable, ignore it. */
17683 if (CALL_P (insn))
17684 return false;
17685 insn = PATTERN (insn);
17686 if (GET_CODE (insn) == PARALLEL)
17687 insn = XVECEXP (insn, 0, 0);
17688 if (GET_CODE (insn) != SET)
17689 return false;
17690 dst = SET_DEST (insn);
17691 if (REG_P (dst) && HARD_REGISTER_P (dst)
17692 && ix86_function_arg_regno_p (REGNO (dst)))
17694 /* Is it likely spilled HW register? */
17695 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
17696 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
17697 *is_spilled = true;
17698 return true;
17700 return false;
17703 /* Add output dependencies for chain of function adjacent arguments if only
17704 there is a move to likely spilled HW register. Return first argument
17705 if at least one dependence was added or NULL otherwise. */
17706 static rtx_insn *
17707 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
17709 rtx_insn *insn;
17710 rtx_insn *last = call;
17711 rtx_insn *first_arg = NULL;
17712 bool is_spilled = false;
17714 head = PREV_INSN (head);
17716 /* Find nearest to call argument passing instruction. */
17717 while (true)
17719 last = PREV_INSN (last);
17720 if (last == head)
17721 return NULL;
17722 if (!NONDEBUG_INSN_P (last))
17723 continue;
17724 if (insn_is_function_arg (last, &is_spilled))
17725 break;
17726 return NULL;
17729 first_arg = last;
17730 while (true)
17732 insn = PREV_INSN (last);
17733 if (!INSN_P (insn))
17734 break;
17735 if (insn == head)
17736 break;
17737 if (!NONDEBUG_INSN_P (insn))
17739 last = insn;
17740 continue;
17742 if (insn_is_function_arg (insn, &is_spilled))
17744 /* Add output depdendence between two function arguments if chain
17745 of output arguments contains likely spilled HW registers. */
17746 if (is_spilled)
17747 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
17748 first_arg = last = insn;
17750 else
17751 break;
17753 if (!is_spilled)
17754 return NULL;
17755 return first_arg;
17758 /* Add output or anti dependency from insn to first_arg to restrict its code
17759 motion. */
17760 static void
17761 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
17763 rtx set;
17764 rtx tmp;
17766 set = single_set (insn);
17767 if (!set)
17768 return;
17769 tmp = SET_DEST (set);
17770 if (REG_P (tmp))
17772 /* Add output dependency to the first function argument. */
17773 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
17774 return;
17776 /* Add anti dependency. */
17777 add_dependence (first_arg, insn, REG_DEP_ANTI);
17780 /* Avoid cross block motion of function argument through adding dependency
17781 from the first non-jump instruction in bb. */
17782 static void
17783 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
17785 rtx_insn *insn = BB_END (bb);
17787 while (insn)
17789 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
17791 rtx set = single_set (insn);
17792 if (set)
17794 avoid_func_arg_motion (arg, insn);
17795 return;
17798 if (insn == BB_HEAD (bb))
17799 return;
17800 insn = PREV_INSN (insn);
17804 /* Hook for pre-reload schedule - avoid motion of function arguments
17805 passed in likely spilled HW registers. */
17806 static void
17807 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
17809 rtx_insn *insn;
17810 rtx_insn *first_arg = NULL;
17811 if (reload_completed)
17812 return;
17813 while (head != tail && DEBUG_INSN_P (head))
17814 head = NEXT_INSN (head);
17815 for (insn = tail; insn != head; insn = PREV_INSN (insn))
17816 if (INSN_P (insn) && CALL_P (insn))
17818 first_arg = add_parameter_dependencies (insn, head);
17819 if (first_arg)
17821 /* Add dependee for first argument to predecessors if only
17822 region contains more than one block. */
17823 basic_block bb = BLOCK_FOR_INSN (insn);
17824 int rgn = CONTAINING_RGN (bb->index);
17825 int nr_blks = RGN_NR_BLOCKS (rgn);
17826 /* Skip trivial regions and region head blocks that can have
17827 predecessors outside of region. */
17828 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
17830 edge e;
17831 edge_iterator ei;
17833 /* Regions are SCCs with the exception of selective
17834 scheduling with pipelining of outer blocks enabled.
17835 So also check that immediate predecessors of a non-head
17836 block are in the same region. */
17837 FOR_EACH_EDGE (e, ei, bb->preds)
17839 /* Avoid creating of loop-carried dependencies through
17840 using topological ordering in the region. */
17841 if (rgn == CONTAINING_RGN (e->src->index)
17842 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
17843 add_dependee_for_func_arg (first_arg, e->src);
17846 insn = first_arg;
17847 if (insn == head)
17848 break;
17851 else if (first_arg)
17852 avoid_func_arg_motion (first_arg, insn);
17855 /* Hook for pre-reload schedule - set priority of moves from likely spilled
17856 HW registers to maximum, to schedule them at soon as possible. These are
17857 moves from function argument registers at the top of the function entry
17858 and moves from function return value registers after call. */
17859 static int
17860 ix86_adjust_priority (rtx_insn *insn, int priority)
17862 rtx set;
17864 if (reload_completed)
17865 return priority;
17867 if (!NONDEBUG_INSN_P (insn))
17868 return priority;
17870 set = single_set (insn);
17871 if (set)
17873 rtx tmp = SET_SRC (set);
17874 if (REG_P (tmp)
17875 && HARD_REGISTER_P (tmp)
17876 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
17877 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
17878 return current_sched_info->sched_max_insns_priority;
17881 return priority;
17884 /* Prepare for scheduling pass. */
17885 static void
17886 ix86_sched_init_global (FILE *, int, int)
17888 /* Install scheduling hooks for current CPU. Some of these hooks are used
17889 in time-critical parts of the scheduler, so we only set them up when
17890 they are actually used. */
17891 switch (ix86_tune)
17893 case PROCESSOR_CORE2:
17894 case PROCESSOR_NEHALEM:
17895 case PROCESSOR_SANDYBRIDGE:
17896 case PROCESSOR_HASWELL:
17897 case PROCESSOR_TREMONT:
17898 case PROCESSOR_ALDERLAKE:
17899 case PROCESSOR_GENERIC:
17900 /* Do not perform multipass scheduling for pre-reload schedule
17901 to save compile time. */
17902 if (reload_completed)
17904 ix86_core2i7_init_hooks ();
17905 break;
17907 /* Fall through. */
17908 default:
17909 targetm.sched.dfa_post_advance_cycle = NULL;
17910 targetm.sched.first_cycle_multipass_init = NULL;
17911 targetm.sched.first_cycle_multipass_begin = NULL;
17912 targetm.sched.first_cycle_multipass_issue = NULL;
17913 targetm.sched.first_cycle_multipass_backtrack = NULL;
17914 targetm.sched.first_cycle_multipass_end = NULL;
17915 targetm.sched.first_cycle_multipass_fini = NULL;
17916 break;
17921 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
17923 static HOST_WIDE_INT
17924 ix86_static_rtx_alignment (machine_mode mode)
17926 if (mode == DFmode)
17927 return 64;
17928 if (ALIGN_MODE_128 (mode))
17929 return MAX (128, GET_MODE_ALIGNMENT (mode));
17930 return GET_MODE_ALIGNMENT (mode);
17933 /* Implement TARGET_CONSTANT_ALIGNMENT. */
17935 static HOST_WIDE_INT
17936 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
17938 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17939 || TREE_CODE (exp) == INTEGER_CST)
17941 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
17942 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
17943 return MAX (mode_align, align);
17945 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
17946 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
17947 return BITS_PER_WORD;
17949 return align;
17952 /* Implement TARGET_EMPTY_RECORD_P. */
17954 static bool
17955 ix86_is_empty_record (const_tree type)
17957 if (!TARGET_64BIT)
17958 return false;
17959 return default_is_empty_record (type);
17962 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
17964 static void
17965 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
17967 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
17969 if (!cum->warn_empty)
17970 return;
17972 if (!TYPE_EMPTY_P (type))
17973 return;
17975 /* Don't warn if the function isn't visible outside of the TU. */
17976 if (cum->decl && !TREE_PUBLIC (cum->decl))
17977 return;
17979 const_tree ctx = get_ultimate_context (cum->decl);
17980 if (ctx != NULL_TREE
17981 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
17982 return;
17984 /* If the actual size of the type is zero, then there is no change
17985 in how objects of this size are passed. */
17986 if (int_size_in_bytes (type) == 0)
17987 return;
17989 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
17990 "changes in %<-fabi-version=12%> (GCC 8)", type);
17992 /* Only warn once. */
17993 cum->warn_empty = false;
17996 /* This hook returns name of multilib ABI. */
17998 static const char *
17999 ix86_get_multilib_abi_name (void)
18001 if (!(TARGET_64BIT_P (ix86_isa_flags)))
18002 return "i386";
18003 else if (TARGET_X32_P (ix86_isa_flags))
18004 return "x32";
18005 else
18006 return "x86_64";
18009 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
18010 the data type, and ALIGN is the alignment that the object would
18011 ordinarily have. */
18013 static int
18014 iamcu_alignment (tree type, int align)
18016 machine_mode mode;
18018 if (align < 32 || TYPE_USER_ALIGN (type))
18019 return align;
18021 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
18022 bytes. */
18023 type = strip_array_types (type);
18024 if (TYPE_ATOMIC (type))
18025 return align;
18027 mode = TYPE_MODE (type);
18028 switch (GET_MODE_CLASS (mode))
18030 case MODE_INT:
18031 case MODE_COMPLEX_INT:
18032 case MODE_COMPLEX_FLOAT:
18033 case MODE_FLOAT:
18034 case MODE_DECIMAL_FLOAT:
18035 return 32;
18036 default:
18037 return align;
18041 /* Compute the alignment for a static variable.
18042 TYPE is the data type, and ALIGN is the alignment that
18043 the object would ordinarily have. The value of this function is used
18044 instead of that alignment to align the object. */
18047 ix86_data_alignment (tree type, unsigned int align, bool opt)
18049 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
18050 for symbols from other compilation units or symbols that don't need
18051 to bind locally. In order to preserve some ABI compatibility with
18052 those compilers, ensure we don't decrease alignment from what we
18053 used to assume. */
18055 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
18057 /* A data structure, equal or greater than the size of a cache line
18058 (64 bytes in the Pentium 4 and other recent Intel processors, including
18059 processors based on Intel Core microarchitecture) should be aligned
18060 so that its base address is a multiple of a cache line size. */
18062 unsigned int max_align
18063 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
18065 if (max_align < BITS_PER_WORD)
18066 max_align = BITS_PER_WORD;
18068 switch (ix86_align_data_type)
18070 case ix86_align_data_type_abi: opt = false; break;
18071 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
18072 case ix86_align_data_type_cacheline: break;
18075 if (TARGET_IAMCU)
18076 align = iamcu_alignment (type, align);
18078 if (opt
18079 && AGGREGATE_TYPE_P (type)
18080 && TYPE_SIZE (type)
18081 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
18083 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
18084 && align < max_align_compat)
18085 align = max_align_compat;
18086 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
18087 && align < max_align)
18088 align = max_align;
18091 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18092 to 16byte boundary. */
18093 if (TARGET_64BIT)
18095 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
18096 && TYPE_SIZE (type)
18097 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18098 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
18099 && align < 128)
18100 return 128;
18103 if (!opt)
18104 return align;
18106 if (TREE_CODE (type) == ARRAY_TYPE)
18108 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18109 return 64;
18110 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18111 return 128;
18113 else if (TREE_CODE (type) == COMPLEX_TYPE)
18116 if (TYPE_MODE (type) == DCmode && align < 64)
18117 return 64;
18118 if ((TYPE_MODE (type) == XCmode
18119 || TYPE_MODE (type) == TCmode) && align < 128)
18120 return 128;
18122 else if (RECORD_OR_UNION_TYPE_P (type)
18123 && TYPE_FIELDS (type))
18125 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18126 return 64;
18127 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18128 return 128;
18130 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18131 || TREE_CODE (type) == INTEGER_TYPE)
18133 if (TYPE_MODE (type) == DFmode && align < 64)
18134 return 64;
18135 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18136 return 128;
18139 return align;
18142 /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
18143 static void
18144 ix86_lower_local_decl_alignment (tree decl)
18146 unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
18147 DECL_ALIGN (decl), true);
18148 if (new_align < DECL_ALIGN (decl))
18149 SET_DECL_ALIGN (decl, new_align);
18152 /* Compute the alignment for a local variable or a stack slot. EXP is
18153 the data type or decl itself, MODE is the widest mode available and
18154 ALIGN is the alignment that the object would ordinarily have. The
18155 value of this macro is used instead of that alignment to align the
18156 object. */
18158 unsigned int
18159 ix86_local_alignment (tree exp, machine_mode mode,
18160 unsigned int align, bool may_lower)
18162 tree type, decl;
18164 if (exp && DECL_P (exp))
18166 type = TREE_TYPE (exp);
18167 decl = exp;
18169 else
18171 type = exp;
18172 decl = NULL;
18175 /* Don't do dynamic stack realignment for long long objects with
18176 -mpreferred-stack-boundary=2. */
18177 if (may_lower
18178 && !TARGET_64BIT
18179 && align == 64
18180 && ix86_preferred_stack_boundary < 64
18181 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
18182 && (!type || (!TYPE_USER_ALIGN (type)
18183 && !TYPE_ATOMIC (strip_array_types (type))))
18184 && (!decl || !DECL_USER_ALIGN (decl)))
18185 align = 32;
18187 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18188 register in MODE. We will return the largest alignment of XF
18189 and DF. */
18190 if (!type)
18192 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18193 align = GET_MODE_ALIGNMENT (DFmode);
18194 return align;
18197 /* Don't increase alignment for Intel MCU psABI. */
18198 if (TARGET_IAMCU)
18199 return align;
18201 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18202 to 16byte boundary. Exact wording is:
18204 An array uses the same alignment as its elements, except that a local or
18205 global array variable of length at least 16 bytes or
18206 a C99 variable-length array variable always has alignment of at least 16 bytes.
18208 This was added to allow use of aligned SSE instructions at arrays. This
18209 rule is meant for static storage (where compiler cannot do the analysis
18210 by itself). We follow it for automatic variables only when convenient.
18211 We fully control everything in the function compiled and functions from
18212 other unit cannot rely on the alignment.
18214 Exclude va_list type. It is the common case of local array where
18215 we cannot benefit from the alignment.
18217 TODO: Probably one should optimize for size only when var is not escaping. */
18218 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
18219 && TARGET_SSE)
18221 if (AGGREGATE_TYPE_P (type)
18222 && (va_list_type_node == NULL_TREE
18223 || (TYPE_MAIN_VARIANT (type)
18224 != TYPE_MAIN_VARIANT (va_list_type_node)))
18225 && TYPE_SIZE (type)
18226 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18227 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
18228 && align < 128)
18229 return 128;
18231 if (TREE_CODE (type) == ARRAY_TYPE)
18233 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18234 return 64;
18235 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18236 return 128;
18238 else if (TREE_CODE (type) == COMPLEX_TYPE)
18240 if (TYPE_MODE (type) == DCmode && align < 64)
18241 return 64;
18242 if ((TYPE_MODE (type) == XCmode
18243 || TYPE_MODE (type) == TCmode) && align < 128)
18244 return 128;
18246 else if (RECORD_OR_UNION_TYPE_P (type)
18247 && TYPE_FIELDS (type))
18249 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18250 return 64;
18251 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18252 return 128;
18254 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
18255 || TREE_CODE (type) == INTEGER_TYPE)
18258 if (TYPE_MODE (type) == DFmode && align < 64)
18259 return 64;
18260 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18261 return 128;
18263 return align;
18266 /* Compute the minimum required alignment for dynamic stack realignment
18267 purposes for a local variable, parameter or a stack slot. EXP is
18268 the data type or decl itself, MODE is its mode and ALIGN is the
18269 alignment that the object would ordinarily have. */
18271 unsigned int
18272 ix86_minimum_alignment (tree exp, machine_mode mode,
18273 unsigned int align)
18275 tree type, decl;
18277 if (exp && DECL_P (exp))
18279 type = TREE_TYPE (exp);
18280 decl = exp;
18282 else
18284 type = exp;
18285 decl = NULL;
18288 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
18289 return align;
18291 /* Don't do dynamic stack realignment for long long objects with
18292 -mpreferred-stack-boundary=2. */
18293 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
18294 && (!type || (!TYPE_USER_ALIGN (type)
18295 && !TYPE_ATOMIC (strip_array_types (type))))
18296 && (!decl || !DECL_USER_ALIGN (decl)))
18298 gcc_checking_assert (!TARGET_STV);
18299 return 32;
18302 return align;
18305 /* Find a location for the static chain incoming to a nested function.
18306 This is a register, unless all free registers are used by arguments. */
18308 static rtx
18309 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
18311 unsigned regno;
18313 if (TARGET_64BIT)
18315 /* We always use R10 in 64-bit mode. */
18316 regno = R10_REG;
18318 else
18320 const_tree fntype, fndecl;
18321 unsigned int ccvt;
18323 /* By default in 32-bit mode we use ECX to pass the static chain. */
18324 regno = CX_REG;
18326 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
18328 fntype = TREE_TYPE (fndecl_or_type);
18329 fndecl = fndecl_or_type;
18331 else
18333 fntype = fndecl_or_type;
18334 fndecl = NULL;
18337 ccvt = ix86_get_callcvt (fntype);
18338 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
18340 /* Fastcall functions use ecx/edx for arguments, which leaves
18341 us with EAX for the static chain.
18342 Thiscall functions use ecx for arguments, which also
18343 leaves us with EAX for the static chain. */
18344 regno = AX_REG;
18346 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
18348 /* Thiscall functions use ecx for arguments, which leaves
18349 us with EAX and EDX for the static chain.
18350 We are using for abi-compatibility EAX. */
18351 regno = AX_REG;
18353 else if (ix86_function_regparm (fntype, fndecl) == 3)
18355 /* For regparm 3, we have no free call-clobbered registers in
18356 which to store the static chain. In order to implement this,
18357 we have the trampoline push the static chain to the stack.
18358 However, we can't push a value below the return address when
18359 we call the nested function directly, so we have to use an
18360 alternate entry point. For this we use ESI, and have the
18361 alternate entry point push ESI, so that things appear the
18362 same once we're executing the nested function. */
18363 if (incoming_p)
18365 if (fndecl == current_function_decl
18366 && !ix86_static_chain_on_stack)
18368 gcc_assert (!reload_completed);
18369 ix86_static_chain_on_stack = true;
18371 return gen_frame_mem (SImode,
18372 plus_constant (Pmode,
18373 arg_pointer_rtx, -8));
18375 regno = SI_REG;
18379 return gen_rtx_REG (Pmode, regno);
18382 /* Emit RTL insns to initialize the variable parts of a trampoline.
18383 FNDECL is the decl of the target address; M_TRAMP is a MEM for
18384 the trampoline, and CHAIN_VALUE is an RTX for the static chain
18385 to be passed to the target function. */
18387 static void
18388 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
18390 rtx mem, fnaddr;
18391 int opcode;
18392 int offset = 0;
18393 bool need_endbr = (flag_cf_protection & CF_BRANCH);
18395 fnaddr = XEXP (DECL_RTL (fndecl), 0);
18397 if (TARGET_64BIT)
18399 int size;
18401 if (need_endbr)
18403 /* Insert ENDBR64. */
18404 mem = adjust_address (m_tramp, SImode, offset);
18405 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
18406 offset += 4;
18409 /* Load the function address to r11. Try to load address using
18410 the shorter movl instead of movabs. We may want to support
18411 movq for kernel mode, but kernel does not use trampolines at
18412 the moment. FNADDR is a 32bit address and may not be in
18413 DImode when ptr_mode == SImode. Always use movl in this
18414 case. */
18415 if (ptr_mode == SImode
18416 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18418 fnaddr = copy_addr_to_reg (fnaddr);
18420 mem = adjust_address (m_tramp, HImode, offset);
18421 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
18423 mem = adjust_address (m_tramp, SImode, offset + 2);
18424 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
18425 offset += 6;
18427 else
18429 mem = adjust_address (m_tramp, HImode, offset);
18430 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
18432 mem = adjust_address (m_tramp, DImode, offset + 2);
18433 emit_move_insn (mem, fnaddr);
18434 offset += 10;
18437 /* Load static chain using movabs to r10. Use the shorter movl
18438 instead of movabs when ptr_mode == SImode. */
18439 if (ptr_mode == SImode)
18441 opcode = 0xba41;
18442 size = 6;
18444 else
18446 opcode = 0xba49;
18447 size = 10;
18450 mem = adjust_address (m_tramp, HImode, offset);
18451 emit_move_insn (mem, gen_int_mode (opcode, HImode));
18453 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
18454 emit_move_insn (mem, chain_value);
18455 offset += size;
18457 /* Jump to r11; the last (unused) byte is a nop, only there to
18458 pad the write out to a single 32-bit store. */
18459 mem = adjust_address (m_tramp, SImode, offset);
18460 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
18461 offset += 4;
18463 else
18465 rtx disp, chain;
18467 /* Depending on the static chain location, either load a register
18468 with a constant, or push the constant to the stack. All of the
18469 instructions are the same size. */
18470 chain = ix86_static_chain (fndecl, true);
18471 if (REG_P (chain))
18473 switch (REGNO (chain))
18475 case AX_REG:
18476 opcode = 0xb8; break;
18477 case CX_REG:
18478 opcode = 0xb9; break;
18479 default:
18480 gcc_unreachable ();
18483 else
18484 opcode = 0x68;
18486 if (need_endbr)
18488 /* Insert ENDBR32. */
18489 mem = adjust_address (m_tramp, SImode, offset);
18490 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
18491 offset += 4;
18494 mem = adjust_address (m_tramp, QImode, offset);
18495 emit_move_insn (mem, gen_int_mode (opcode, QImode));
18497 mem = adjust_address (m_tramp, SImode, offset + 1);
18498 emit_move_insn (mem, chain_value);
18499 offset += 5;
18501 mem = adjust_address (m_tramp, QImode, offset);
18502 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
18504 mem = adjust_address (m_tramp, SImode, offset + 1);
18506 /* Compute offset from the end of the jmp to the target function.
18507 In the case in which the trampoline stores the static chain on
18508 the stack, we need to skip the first insn which pushes the
18509 (call-saved) register static chain; this push is 1 byte. */
18510 offset += 5;
18511 int skip = MEM_P (chain) ? 1 : 0;
18512 /* Skip ENDBR32 at the entry of the target function. */
18513 if (need_endbr
18514 && !cgraph_node::get (fndecl)->only_called_directly_p ())
18515 skip += 4;
18516 disp = expand_binop (SImode, sub_optab, fnaddr,
18517 plus_constant (Pmode, XEXP (m_tramp, 0),
18518 offset - skip),
18519 NULL_RTX, 1, OPTAB_DIRECT);
18520 emit_move_insn (mem, disp);
18523 gcc_assert (offset <= TRAMPOLINE_SIZE);
18525 #ifdef HAVE_ENABLE_EXECUTE_STACK
18526 #ifdef CHECK_EXECUTE_STACK_ENABLED
18527 if (CHECK_EXECUTE_STACK_ENABLED)
18528 #endif
18529 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18530 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
18531 #endif
18534 static bool
18535 ix86_allocate_stack_slots_for_args (void)
18537 /* Naked functions should not allocate stack slots for arguments. */
18538 return !ix86_function_naked (current_function_decl);
18541 static bool
18542 ix86_warn_func_return (tree decl)
18544 /* Naked functions are implemented entirely in assembly, including the
18545 return sequence, so suppress warnings about this. */
18546 return !ix86_function_naked (decl);
18549 /* Return the shift count of a vector by scalar shift builtin second argument
18550 ARG1. */
18551 static tree
18552 ix86_vector_shift_count (tree arg1)
18554 if (tree_fits_uhwi_p (arg1))
18555 return arg1;
18556 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
18558 /* The count argument is weird, passed in as various 128-bit
18559 (or 64-bit) vectors, the low 64 bits from it are the count. */
18560 unsigned char buf[16];
18561 int len = native_encode_expr (arg1, buf, 16);
18562 if (len == 0)
18563 return NULL_TREE;
18564 tree t = native_interpret_expr (uint64_type_node, buf, len);
18565 if (t && tree_fits_uhwi_p (t))
18566 return t;
18568 return NULL_TREE;
18571 /* Return true if arg_mask is all ones, ELEMS is elements number of
18572 corresponding vector. */
18573 static bool
18574 ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
18576 if (TREE_CODE (arg_mask) != INTEGER_CST)
18577 return false;
18579 unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
18580 if (elems == HOST_BITS_PER_WIDE_INT)
18581 return mask == HOST_WIDE_INT_M1U;
18582 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
18583 return false;
18585 return true;
18588 static tree
18589 ix86_fold_builtin (tree fndecl, int n_args,
18590 tree *args, bool ignore ATTRIBUTE_UNUSED)
18592 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
18594 enum ix86_builtins fn_code
18595 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
18596 enum rtx_code rcode;
18597 bool is_vshift;
18598 unsigned HOST_WIDE_INT mask;
18600 switch (fn_code)
18602 case IX86_BUILTIN_CPU_IS:
18603 case IX86_BUILTIN_CPU_SUPPORTS:
18604 gcc_assert (n_args == 1);
18605 return fold_builtin_cpu (fndecl, args);
18607 case IX86_BUILTIN_NANQ:
18608 case IX86_BUILTIN_NANSQ:
18610 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18611 const char *str = c_getstr (*args);
18612 int quiet = fn_code == IX86_BUILTIN_NANQ;
18613 REAL_VALUE_TYPE real;
18615 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
18616 return build_real (type, real);
18617 return NULL_TREE;
18620 case IX86_BUILTIN_INFQ:
18621 case IX86_BUILTIN_HUGE_VALQ:
18623 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18624 REAL_VALUE_TYPE inf;
18625 real_inf (&inf);
18626 return build_real (type, inf);
18629 case IX86_BUILTIN_TZCNT16:
18630 case IX86_BUILTIN_CTZS:
18631 case IX86_BUILTIN_TZCNT32:
18632 case IX86_BUILTIN_TZCNT64:
18633 gcc_assert (n_args == 1);
18634 if (TREE_CODE (args[0]) == INTEGER_CST)
18636 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18637 tree arg = args[0];
18638 if (fn_code == IX86_BUILTIN_TZCNT16
18639 || fn_code == IX86_BUILTIN_CTZS)
18640 arg = fold_convert (short_unsigned_type_node, arg);
18641 if (integer_zerop (arg))
18642 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
18643 else
18644 return fold_const_call (CFN_CTZ, type, arg);
18646 break;
18648 case IX86_BUILTIN_LZCNT16:
18649 case IX86_BUILTIN_CLZS:
18650 case IX86_BUILTIN_LZCNT32:
18651 case IX86_BUILTIN_LZCNT64:
18652 gcc_assert (n_args == 1);
18653 if (TREE_CODE (args[0]) == INTEGER_CST)
18655 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18656 tree arg = args[0];
18657 if (fn_code == IX86_BUILTIN_LZCNT16
18658 || fn_code == IX86_BUILTIN_CLZS)
18659 arg = fold_convert (short_unsigned_type_node, arg);
18660 if (integer_zerop (arg))
18661 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
18662 else
18663 return fold_const_call (CFN_CLZ, type, arg);
18665 break;
18667 case IX86_BUILTIN_BEXTR32:
18668 case IX86_BUILTIN_BEXTR64:
18669 case IX86_BUILTIN_BEXTRI32:
18670 case IX86_BUILTIN_BEXTRI64:
18671 gcc_assert (n_args == 2);
18672 if (tree_fits_uhwi_p (args[1]))
18674 unsigned HOST_WIDE_INT res = 0;
18675 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
18676 unsigned int start = tree_to_uhwi (args[1]);
18677 unsigned int len = (start & 0xff00) >> 8;
18678 start &= 0xff;
18679 if (start >= prec || len == 0)
18680 res = 0;
18681 else if (!tree_fits_uhwi_p (args[0]))
18682 break;
18683 else
18684 res = tree_to_uhwi (args[0]) >> start;
18685 if (len > prec)
18686 len = prec;
18687 if (len < HOST_BITS_PER_WIDE_INT)
18688 res &= (HOST_WIDE_INT_1U << len) - 1;
18689 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18691 break;
18693 case IX86_BUILTIN_BZHI32:
18694 case IX86_BUILTIN_BZHI64:
18695 gcc_assert (n_args == 2);
18696 if (tree_fits_uhwi_p (args[1]))
18698 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
18699 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
18700 return args[0];
18701 if (idx == 0)
18702 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
18703 if (!tree_fits_uhwi_p (args[0]))
18704 break;
18705 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
18706 res &= ~(HOST_WIDE_INT_M1U << idx);
18707 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18709 break;
18711 case IX86_BUILTIN_PDEP32:
18712 case IX86_BUILTIN_PDEP64:
18713 gcc_assert (n_args == 2);
18714 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
18716 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
18717 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
18718 unsigned HOST_WIDE_INT res = 0;
18719 unsigned HOST_WIDE_INT m, k = 1;
18720 for (m = 1; m; m <<= 1)
18721 if ((mask & m) != 0)
18723 if ((src & k) != 0)
18724 res |= m;
18725 k <<= 1;
18727 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18729 break;
18731 case IX86_BUILTIN_PEXT32:
18732 case IX86_BUILTIN_PEXT64:
18733 gcc_assert (n_args == 2);
18734 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
18736 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
18737 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
18738 unsigned HOST_WIDE_INT res = 0;
18739 unsigned HOST_WIDE_INT m, k = 1;
18740 for (m = 1; m; m <<= 1)
18741 if ((mask & m) != 0)
18743 if ((src & m) != 0)
18744 res |= k;
18745 k <<= 1;
18747 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18749 break;
18751 case IX86_BUILTIN_MOVMSKPS:
18752 case IX86_BUILTIN_PMOVMSKB:
18753 case IX86_BUILTIN_MOVMSKPD:
18754 case IX86_BUILTIN_PMOVMSKB128:
18755 case IX86_BUILTIN_MOVMSKPD256:
18756 case IX86_BUILTIN_MOVMSKPS256:
18757 case IX86_BUILTIN_PMOVMSKB256:
18758 gcc_assert (n_args == 1);
18759 if (TREE_CODE (args[0]) == VECTOR_CST)
18761 HOST_WIDE_INT res = 0;
18762 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
18764 tree e = VECTOR_CST_ELT (args[0], i);
18765 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
18767 if (wi::neg_p (wi::to_wide (e)))
18768 res |= HOST_WIDE_INT_1 << i;
18770 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
18772 if (TREE_REAL_CST (e).sign)
18773 res |= HOST_WIDE_INT_1 << i;
18775 else
18776 return NULL_TREE;
18778 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
18780 break;
18782 case IX86_BUILTIN_PSLLD:
18783 case IX86_BUILTIN_PSLLD128:
18784 case IX86_BUILTIN_PSLLD128_MASK:
18785 case IX86_BUILTIN_PSLLD256:
18786 case IX86_BUILTIN_PSLLD256_MASK:
18787 case IX86_BUILTIN_PSLLD512:
18788 case IX86_BUILTIN_PSLLDI:
18789 case IX86_BUILTIN_PSLLDI128:
18790 case IX86_BUILTIN_PSLLDI128_MASK:
18791 case IX86_BUILTIN_PSLLDI256:
18792 case IX86_BUILTIN_PSLLDI256_MASK:
18793 case IX86_BUILTIN_PSLLDI512:
18794 case IX86_BUILTIN_PSLLQ:
18795 case IX86_BUILTIN_PSLLQ128:
18796 case IX86_BUILTIN_PSLLQ128_MASK:
18797 case IX86_BUILTIN_PSLLQ256:
18798 case IX86_BUILTIN_PSLLQ256_MASK:
18799 case IX86_BUILTIN_PSLLQ512:
18800 case IX86_BUILTIN_PSLLQI:
18801 case IX86_BUILTIN_PSLLQI128:
18802 case IX86_BUILTIN_PSLLQI128_MASK:
18803 case IX86_BUILTIN_PSLLQI256:
18804 case IX86_BUILTIN_PSLLQI256_MASK:
18805 case IX86_BUILTIN_PSLLQI512:
18806 case IX86_BUILTIN_PSLLW:
18807 case IX86_BUILTIN_PSLLW128:
18808 case IX86_BUILTIN_PSLLW128_MASK:
18809 case IX86_BUILTIN_PSLLW256:
18810 case IX86_BUILTIN_PSLLW256_MASK:
18811 case IX86_BUILTIN_PSLLW512_MASK:
18812 case IX86_BUILTIN_PSLLWI:
18813 case IX86_BUILTIN_PSLLWI128:
18814 case IX86_BUILTIN_PSLLWI128_MASK:
18815 case IX86_BUILTIN_PSLLWI256:
18816 case IX86_BUILTIN_PSLLWI256_MASK:
18817 case IX86_BUILTIN_PSLLWI512_MASK:
18818 rcode = ASHIFT;
18819 is_vshift = false;
18820 goto do_shift;
18821 case IX86_BUILTIN_PSRAD:
18822 case IX86_BUILTIN_PSRAD128:
18823 case IX86_BUILTIN_PSRAD128_MASK:
18824 case IX86_BUILTIN_PSRAD256:
18825 case IX86_BUILTIN_PSRAD256_MASK:
18826 case IX86_BUILTIN_PSRAD512:
18827 case IX86_BUILTIN_PSRADI:
18828 case IX86_BUILTIN_PSRADI128:
18829 case IX86_BUILTIN_PSRADI128_MASK:
18830 case IX86_BUILTIN_PSRADI256:
18831 case IX86_BUILTIN_PSRADI256_MASK:
18832 case IX86_BUILTIN_PSRADI512:
18833 case IX86_BUILTIN_PSRAQ128_MASK:
18834 case IX86_BUILTIN_PSRAQ256_MASK:
18835 case IX86_BUILTIN_PSRAQ512:
18836 case IX86_BUILTIN_PSRAQI128_MASK:
18837 case IX86_BUILTIN_PSRAQI256_MASK:
18838 case IX86_BUILTIN_PSRAQI512:
18839 case IX86_BUILTIN_PSRAW:
18840 case IX86_BUILTIN_PSRAW128:
18841 case IX86_BUILTIN_PSRAW128_MASK:
18842 case IX86_BUILTIN_PSRAW256:
18843 case IX86_BUILTIN_PSRAW256_MASK:
18844 case IX86_BUILTIN_PSRAW512:
18845 case IX86_BUILTIN_PSRAWI:
18846 case IX86_BUILTIN_PSRAWI128:
18847 case IX86_BUILTIN_PSRAWI128_MASK:
18848 case IX86_BUILTIN_PSRAWI256:
18849 case IX86_BUILTIN_PSRAWI256_MASK:
18850 case IX86_BUILTIN_PSRAWI512:
18851 rcode = ASHIFTRT;
18852 is_vshift = false;
18853 goto do_shift;
18854 case IX86_BUILTIN_PSRLD:
18855 case IX86_BUILTIN_PSRLD128:
18856 case IX86_BUILTIN_PSRLD128_MASK:
18857 case IX86_BUILTIN_PSRLD256:
18858 case IX86_BUILTIN_PSRLD256_MASK:
18859 case IX86_BUILTIN_PSRLD512:
18860 case IX86_BUILTIN_PSRLDI:
18861 case IX86_BUILTIN_PSRLDI128:
18862 case IX86_BUILTIN_PSRLDI128_MASK:
18863 case IX86_BUILTIN_PSRLDI256:
18864 case IX86_BUILTIN_PSRLDI256_MASK:
18865 case IX86_BUILTIN_PSRLDI512:
18866 case IX86_BUILTIN_PSRLQ:
18867 case IX86_BUILTIN_PSRLQ128:
18868 case IX86_BUILTIN_PSRLQ128_MASK:
18869 case IX86_BUILTIN_PSRLQ256:
18870 case IX86_BUILTIN_PSRLQ256_MASK:
18871 case IX86_BUILTIN_PSRLQ512:
18872 case IX86_BUILTIN_PSRLQI:
18873 case IX86_BUILTIN_PSRLQI128:
18874 case IX86_BUILTIN_PSRLQI128_MASK:
18875 case IX86_BUILTIN_PSRLQI256:
18876 case IX86_BUILTIN_PSRLQI256_MASK:
18877 case IX86_BUILTIN_PSRLQI512:
18878 case IX86_BUILTIN_PSRLW:
18879 case IX86_BUILTIN_PSRLW128:
18880 case IX86_BUILTIN_PSRLW128_MASK:
18881 case IX86_BUILTIN_PSRLW256:
18882 case IX86_BUILTIN_PSRLW256_MASK:
18883 case IX86_BUILTIN_PSRLW512:
18884 case IX86_BUILTIN_PSRLWI:
18885 case IX86_BUILTIN_PSRLWI128:
18886 case IX86_BUILTIN_PSRLWI128_MASK:
18887 case IX86_BUILTIN_PSRLWI256:
18888 case IX86_BUILTIN_PSRLWI256_MASK:
18889 case IX86_BUILTIN_PSRLWI512:
18890 rcode = LSHIFTRT;
18891 is_vshift = false;
18892 goto do_shift;
18893 case IX86_BUILTIN_PSLLVV16HI:
18894 case IX86_BUILTIN_PSLLVV16SI:
18895 case IX86_BUILTIN_PSLLVV2DI:
18896 case IX86_BUILTIN_PSLLVV2DI_MASK:
18897 case IX86_BUILTIN_PSLLVV32HI:
18898 case IX86_BUILTIN_PSLLVV4DI:
18899 case IX86_BUILTIN_PSLLVV4DI_MASK:
18900 case IX86_BUILTIN_PSLLVV4SI:
18901 case IX86_BUILTIN_PSLLVV4SI_MASK:
18902 case IX86_BUILTIN_PSLLVV8DI:
18903 case IX86_BUILTIN_PSLLVV8HI:
18904 case IX86_BUILTIN_PSLLVV8SI:
18905 case IX86_BUILTIN_PSLLVV8SI_MASK:
18906 rcode = ASHIFT;
18907 is_vshift = true;
18908 goto do_shift;
18909 case IX86_BUILTIN_PSRAVQ128:
18910 case IX86_BUILTIN_PSRAVQ256:
18911 case IX86_BUILTIN_PSRAVV16HI:
18912 case IX86_BUILTIN_PSRAVV16SI:
18913 case IX86_BUILTIN_PSRAVV32HI:
18914 case IX86_BUILTIN_PSRAVV4SI:
18915 case IX86_BUILTIN_PSRAVV4SI_MASK:
18916 case IX86_BUILTIN_PSRAVV8DI:
18917 case IX86_BUILTIN_PSRAVV8HI:
18918 case IX86_BUILTIN_PSRAVV8SI:
18919 case IX86_BUILTIN_PSRAVV8SI_MASK:
18920 rcode = ASHIFTRT;
18921 is_vshift = true;
18922 goto do_shift;
18923 case IX86_BUILTIN_PSRLVV16HI:
18924 case IX86_BUILTIN_PSRLVV16SI:
18925 case IX86_BUILTIN_PSRLVV2DI:
18926 case IX86_BUILTIN_PSRLVV2DI_MASK:
18927 case IX86_BUILTIN_PSRLVV32HI:
18928 case IX86_BUILTIN_PSRLVV4DI:
18929 case IX86_BUILTIN_PSRLVV4DI_MASK:
18930 case IX86_BUILTIN_PSRLVV4SI:
18931 case IX86_BUILTIN_PSRLVV4SI_MASK:
18932 case IX86_BUILTIN_PSRLVV8DI:
18933 case IX86_BUILTIN_PSRLVV8HI:
18934 case IX86_BUILTIN_PSRLVV8SI:
18935 case IX86_BUILTIN_PSRLVV8SI_MASK:
18936 rcode = LSHIFTRT;
18937 is_vshift = true;
18938 goto do_shift;
18940 do_shift:
18941 gcc_assert (n_args >= 2);
18942 if (TREE_CODE (args[0]) != VECTOR_CST)
18943 break;
18944 mask = HOST_WIDE_INT_M1U;
18945 if (n_args > 2)
18947 /* This is masked shift. */
18948 if (!tree_fits_uhwi_p (args[n_args - 1])
18949 || TREE_SIDE_EFFECTS (args[n_args - 2]))
18950 break;
18951 mask = tree_to_uhwi (args[n_args - 1]);
18952 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
18953 mask |= HOST_WIDE_INT_M1U << elems;
18954 if (mask != HOST_WIDE_INT_M1U
18955 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
18956 break;
18957 if (mask == (HOST_WIDE_INT_M1U << elems))
18958 return args[n_args - 2];
18960 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
18961 break;
18962 if (tree tem = (is_vshift ? integer_one_node
18963 : ix86_vector_shift_count (args[1])))
18965 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
18966 unsigned HOST_WIDE_INT prec
18967 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
18968 if (count == 0 && mask == HOST_WIDE_INT_M1U)
18969 return args[0];
18970 if (count >= prec)
18972 if (rcode == ASHIFTRT)
18973 count = prec - 1;
18974 else if (mask == HOST_WIDE_INT_M1U)
18975 return build_zero_cst (TREE_TYPE (args[0]));
18977 tree countt = NULL_TREE;
18978 if (!is_vshift)
18980 if (count >= prec)
18981 countt = integer_zero_node;
18982 else
18983 countt = build_int_cst (integer_type_node, count);
18985 tree_vector_builder builder;
18986 if (mask != HOST_WIDE_INT_M1U || is_vshift)
18987 builder.new_vector (TREE_TYPE (args[0]),
18988 TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
18990 else
18991 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
18992 false);
18993 unsigned int cnt = builder.encoded_nelts ();
18994 for (unsigned int i = 0; i < cnt; ++i)
18996 tree elt = VECTOR_CST_ELT (args[0], i);
18997 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
18998 return NULL_TREE;
18999 tree type = TREE_TYPE (elt);
19000 if (rcode == LSHIFTRT)
19001 elt = fold_convert (unsigned_type_for (type), elt);
19002 if (is_vshift)
19004 countt = VECTOR_CST_ELT (args[1], i);
19005 if (TREE_CODE (countt) != INTEGER_CST
19006 || TREE_OVERFLOW (countt))
19007 return NULL_TREE;
19008 if (wi::neg_p (wi::to_wide (countt))
19009 || wi::to_widest (countt) >= prec)
19011 if (rcode == ASHIFTRT)
19012 countt = build_int_cst (TREE_TYPE (countt),
19013 prec - 1);
19014 else
19016 elt = build_zero_cst (TREE_TYPE (elt));
19017 countt = build_zero_cst (TREE_TYPE (countt));
19021 else if (count >= prec)
19022 elt = build_zero_cst (TREE_TYPE (elt));
19023 elt = const_binop (rcode == ASHIFT
19024 ? LSHIFT_EXPR : RSHIFT_EXPR,
19025 TREE_TYPE (elt), elt, countt);
19026 if (!elt || TREE_CODE (elt) != INTEGER_CST)
19027 return NULL_TREE;
19028 if (rcode == LSHIFTRT)
19029 elt = fold_convert (type, elt);
19030 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
19032 elt = VECTOR_CST_ELT (args[n_args - 2], i);
19033 if (TREE_CODE (elt) != INTEGER_CST
19034 || TREE_OVERFLOW (elt))
19035 return NULL_TREE;
19037 builder.quick_push (elt);
19039 return builder.build ();
19041 break;
19043 default:
19044 break;
19048 #ifdef SUBTARGET_FOLD_BUILTIN
19049 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
19050 #endif
19052 return NULL_TREE;
19055 /* Fold a MD builtin (use ix86_fold_builtin for folding into
19056 constant) in GIMPLE. */
19058 bool
19059 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
19061 gimple *stmt = gsi_stmt (*gsi), *g;
19062 gimple_seq stmts = NULL;
19063 tree fndecl = gimple_call_fndecl (stmt);
19064 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
19065 int n_args = gimple_call_num_args (stmt);
19066 enum ix86_builtins fn_code
19067 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
19068 tree decl = NULL_TREE;
19069 tree arg0, arg1, arg2;
19070 enum rtx_code rcode;
19071 enum tree_code tcode;
19072 unsigned HOST_WIDE_INT count;
19073 bool is_vshift;
19074 unsigned HOST_WIDE_INT elems;
19075 location_t loc;
19077 /* Don't fold when there's isa mismatch. */
19078 if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
19079 return false;
19081 switch (fn_code)
19083 case IX86_BUILTIN_TZCNT32:
19084 decl = builtin_decl_implicit (BUILT_IN_CTZ);
19085 goto fold_tzcnt_lzcnt;
19087 case IX86_BUILTIN_TZCNT64:
19088 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
19089 goto fold_tzcnt_lzcnt;
19091 case IX86_BUILTIN_LZCNT32:
19092 decl = builtin_decl_implicit (BUILT_IN_CLZ);
19093 goto fold_tzcnt_lzcnt;
19095 case IX86_BUILTIN_LZCNT64:
19096 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
19097 goto fold_tzcnt_lzcnt;
19099 fold_tzcnt_lzcnt:
19100 gcc_assert (n_args == 1);
19101 arg0 = gimple_call_arg (stmt, 0);
19102 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
19104 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
19105 /* If arg0 is provably non-zero, optimize into generic
19106 __builtin_c[tl]z{,ll} function the middle-end handles
19107 better. */
19108 if (!expr_not_equal_to (arg0, wi::zero (prec)))
19109 return false;
19111 loc = gimple_location (stmt);
19112 g = gimple_build_call (decl, 1, arg0);
19113 gimple_set_location (g, loc);
19114 tree lhs = make_ssa_name (integer_type_node);
19115 gimple_call_set_lhs (g, lhs);
19116 gsi_insert_before (gsi, g, GSI_SAME_STMT);
19117 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
19118 gimple_set_location (g, loc);
19119 gsi_replace (gsi, g, false);
19120 return true;
19122 break;
19124 case IX86_BUILTIN_BZHI32:
19125 case IX86_BUILTIN_BZHI64:
19126 gcc_assert (n_args == 2);
19127 arg1 = gimple_call_arg (stmt, 1);
19128 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
19130 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
19131 arg0 = gimple_call_arg (stmt, 0);
19132 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
19133 break;
19134 loc = gimple_location (stmt);
19135 g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19136 gimple_set_location (g, loc);
19137 gsi_replace (gsi, g, false);
19138 return true;
19140 break;
19142 case IX86_BUILTIN_PDEP32:
19143 case IX86_BUILTIN_PDEP64:
19144 case IX86_BUILTIN_PEXT32:
19145 case IX86_BUILTIN_PEXT64:
19146 gcc_assert (n_args == 2);
19147 arg1 = gimple_call_arg (stmt, 1);
19148 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
19150 loc = gimple_location (stmt);
19151 arg0 = gimple_call_arg (stmt, 0);
19152 g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19153 gimple_set_location (g, loc);
19154 gsi_replace (gsi, g, false);
19155 return true;
19157 break;
19159 case IX86_BUILTIN_PBLENDVB256:
19160 case IX86_BUILTIN_BLENDVPS256:
19161 case IX86_BUILTIN_BLENDVPD256:
19162 /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
19163 to scalar operations and not combined back. */
19164 if (!TARGET_AVX2)
19165 break;
19167 /* FALLTHRU. */
19168 case IX86_BUILTIN_BLENDVPD:
19169 /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
19170 w/o sse4.2, it's veclowered to scalar operations and
19171 not combined back. */
19172 if (!TARGET_SSE4_2)
19173 break;
19174 /* FALLTHRU. */
19175 case IX86_BUILTIN_PBLENDVB128:
19176 case IX86_BUILTIN_BLENDVPS:
19177 gcc_assert (n_args == 3);
19178 arg0 = gimple_call_arg (stmt, 0);
19179 arg1 = gimple_call_arg (stmt, 1);
19180 arg2 = gimple_call_arg (stmt, 2);
19181 if (gimple_call_lhs (stmt))
19183 loc = gimple_location (stmt);
19184 tree type = TREE_TYPE (arg2);
19185 if (VECTOR_FLOAT_TYPE_P (type))
19187 tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
19188 ? intSI_type_node : intDI_type_node;
19189 type = get_same_sized_vectype (itype, type);
19191 else
19192 type = signed_type_for (type);
19193 arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
19194 tree zero_vec = build_zero_cst (type);
19195 tree cmp_type = truth_type_for (type);
19196 tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
19197 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19198 g = gimple_build_assign (gimple_call_lhs (stmt),
19199 VEC_COND_EXPR, cmp,
19200 arg1, arg0);
19201 gimple_set_location (g, loc);
19202 gsi_replace (gsi, g, false);
19204 else
19205 gsi_replace (gsi, gimple_build_nop (), false);
19206 return true;
19209 case IX86_BUILTIN_PCMPEQB128:
19210 case IX86_BUILTIN_PCMPEQW128:
19211 case IX86_BUILTIN_PCMPEQD128:
19212 case IX86_BUILTIN_PCMPEQQ:
19213 case IX86_BUILTIN_PCMPEQB256:
19214 case IX86_BUILTIN_PCMPEQW256:
19215 case IX86_BUILTIN_PCMPEQD256:
19216 case IX86_BUILTIN_PCMPEQQ256:
19217 tcode = EQ_EXPR;
19218 goto do_cmp;
19220 case IX86_BUILTIN_PCMPGTB128:
19221 case IX86_BUILTIN_PCMPGTW128:
19222 case IX86_BUILTIN_PCMPGTD128:
19223 case IX86_BUILTIN_PCMPGTQ:
19224 case IX86_BUILTIN_PCMPGTB256:
19225 case IX86_BUILTIN_PCMPGTW256:
19226 case IX86_BUILTIN_PCMPGTD256:
19227 case IX86_BUILTIN_PCMPGTQ256:
19228 tcode = GT_EXPR;
19230 do_cmp:
19231 gcc_assert (n_args == 2);
19232 arg0 = gimple_call_arg (stmt, 0);
19233 arg1 = gimple_call_arg (stmt, 1);
19234 if (gimple_call_lhs (stmt))
19236 loc = gimple_location (stmt);
19237 tree type = TREE_TYPE (arg0);
19238 tree zero_vec = build_zero_cst (type);
19239 tree minus_one_vec = build_minus_one_cst (type);
19240 tree cmp_type = truth_type_for (type);
19241 tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
19242 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19243 g = gimple_build_assign (gimple_call_lhs (stmt),
19244 VEC_COND_EXPR, cmp,
19245 minus_one_vec, zero_vec);
19246 gimple_set_location (g, loc);
19247 gsi_replace (gsi, g, false);
19249 else
19250 gsi_replace (gsi, gimple_build_nop (), false);
19251 return true;
19253 case IX86_BUILTIN_PSLLD:
19254 case IX86_BUILTIN_PSLLD128:
19255 case IX86_BUILTIN_PSLLD128_MASK:
19256 case IX86_BUILTIN_PSLLD256:
19257 case IX86_BUILTIN_PSLLD256_MASK:
19258 case IX86_BUILTIN_PSLLD512:
19259 case IX86_BUILTIN_PSLLDI:
19260 case IX86_BUILTIN_PSLLDI128:
19261 case IX86_BUILTIN_PSLLDI128_MASK:
19262 case IX86_BUILTIN_PSLLDI256:
19263 case IX86_BUILTIN_PSLLDI256_MASK:
19264 case IX86_BUILTIN_PSLLDI512:
19265 case IX86_BUILTIN_PSLLQ:
19266 case IX86_BUILTIN_PSLLQ128:
19267 case IX86_BUILTIN_PSLLQ128_MASK:
19268 case IX86_BUILTIN_PSLLQ256:
19269 case IX86_BUILTIN_PSLLQ256_MASK:
19270 case IX86_BUILTIN_PSLLQ512:
19271 case IX86_BUILTIN_PSLLQI:
19272 case IX86_BUILTIN_PSLLQI128:
19273 case IX86_BUILTIN_PSLLQI128_MASK:
19274 case IX86_BUILTIN_PSLLQI256:
19275 case IX86_BUILTIN_PSLLQI256_MASK:
19276 case IX86_BUILTIN_PSLLQI512:
19277 case IX86_BUILTIN_PSLLW:
19278 case IX86_BUILTIN_PSLLW128:
19279 case IX86_BUILTIN_PSLLW128_MASK:
19280 case IX86_BUILTIN_PSLLW256:
19281 case IX86_BUILTIN_PSLLW256_MASK:
19282 case IX86_BUILTIN_PSLLW512_MASK:
19283 case IX86_BUILTIN_PSLLWI:
19284 case IX86_BUILTIN_PSLLWI128:
19285 case IX86_BUILTIN_PSLLWI128_MASK:
19286 case IX86_BUILTIN_PSLLWI256:
19287 case IX86_BUILTIN_PSLLWI256_MASK:
19288 case IX86_BUILTIN_PSLLWI512_MASK:
19289 rcode = ASHIFT;
19290 is_vshift = false;
19291 goto do_shift;
19292 case IX86_BUILTIN_PSRAD:
19293 case IX86_BUILTIN_PSRAD128:
19294 case IX86_BUILTIN_PSRAD128_MASK:
19295 case IX86_BUILTIN_PSRAD256:
19296 case IX86_BUILTIN_PSRAD256_MASK:
19297 case IX86_BUILTIN_PSRAD512:
19298 case IX86_BUILTIN_PSRADI:
19299 case IX86_BUILTIN_PSRADI128:
19300 case IX86_BUILTIN_PSRADI128_MASK:
19301 case IX86_BUILTIN_PSRADI256:
19302 case IX86_BUILTIN_PSRADI256_MASK:
19303 case IX86_BUILTIN_PSRADI512:
19304 case IX86_BUILTIN_PSRAQ128_MASK:
19305 case IX86_BUILTIN_PSRAQ256_MASK:
19306 case IX86_BUILTIN_PSRAQ512:
19307 case IX86_BUILTIN_PSRAQI128_MASK:
19308 case IX86_BUILTIN_PSRAQI256_MASK:
19309 case IX86_BUILTIN_PSRAQI512:
19310 case IX86_BUILTIN_PSRAW:
19311 case IX86_BUILTIN_PSRAW128:
19312 case IX86_BUILTIN_PSRAW128_MASK:
19313 case IX86_BUILTIN_PSRAW256:
19314 case IX86_BUILTIN_PSRAW256_MASK:
19315 case IX86_BUILTIN_PSRAW512:
19316 case IX86_BUILTIN_PSRAWI:
19317 case IX86_BUILTIN_PSRAWI128:
19318 case IX86_BUILTIN_PSRAWI128_MASK:
19319 case IX86_BUILTIN_PSRAWI256:
19320 case IX86_BUILTIN_PSRAWI256_MASK:
19321 case IX86_BUILTIN_PSRAWI512:
19322 rcode = ASHIFTRT;
19323 is_vshift = false;
19324 goto do_shift;
19325 case IX86_BUILTIN_PSRLD:
19326 case IX86_BUILTIN_PSRLD128:
19327 case IX86_BUILTIN_PSRLD128_MASK:
19328 case IX86_BUILTIN_PSRLD256:
19329 case IX86_BUILTIN_PSRLD256_MASK:
19330 case IX86_BUILTIN_PSRLD512:
19331 case IX86_BUILTIN_PSRLDI:
19332 case IX86_BUILTIN_PSRLDI128:
19333 case IX86_BUILTIN_PSRLDI128_MASK:
19334 case IX86_BUILTIN_PSRLDI256:
19335 case IX86_BUILTIN_PSRLDI256_MASK:
19336 case IX86_BUILTIN_PSRLDI512:
19337 case IX86_BUILTIN_PSRLQ:
19338 case IX86_BUILTIN_PSRLQ128:
19339 case IX86_BUILTIN_PSRLQ128_MASK:
19340 case IX86_BUILTIN_PSRLQ256:
19341 case IX86_BUILTIN_PSRLQ256_MASK:
19342 case IX86_BUILTIN_PSRLQ512:
19343 case IX86_BUILTIN_PSRLQI:
19344 case IX86_BUILTIN_PSRLQI128:
19345 case IX86_BUILTIN_PSRLQI128_MASK:
19346 case IX86_BUILTIN_PSRLQI256:
19347 case IX86_BUILTIN_PSRLQI256_MASK:
19348 case IX86_BUILTIN_PSRLQI512:
19349 case IX86_BUILTIN_PSRLW:
19350 case IX86_BUILTIN_PSRLW128:
19351 case IX86_BUILTIN_PSRLW128_MASK:
19352 case IX86_BUILTIN_PSRLW256:
19353 case IX86_BUILTIN_PSRLW256_MASK:
19354 case IX86_BUILTIN_PSRLW512:
19355 case IX86_BUILTIN_PSRLWI:
19356 case IX86_BUILTIN_PSRLWI128:
19357 case IX86_BUILTIN_PSRLWI128_MASK:
19358 case IX86_BUILTIN_PSRLWI256:
19359 case IX86_BUILTIN_PSRLWI256_MASK:
19360 case IX86_BUILTIN_PSRLWI512:
19361 rcode = LSHIFTRT;
19362 is_vshift = false;
19363 goto do_shift;
19364 case IX86_BUILTIN_PSLLVV16HI:
19365 case IX86_BUILTIN_PSLLVV16SI:
19366 case IX86_BUILTIN_PSLLVV2DI:
19367 case IX86_BUILTIN_PSLLVV2DI_MASK:
19368 case IX86_BUILTIN_PSLLVV32HI:
19369 case IX86_BUILTIN_PSLLVV4DI:
19370 case IX86_BUILTIN_PSLLVV4DI_MASK:
19371 case IX86_BUILTIN_PSLLVV4SI:
19372 case IX86_BUILTIN_PSLLVV4SI_MASK:
19373 case IX86_BUILTIN_PSLLVV8DI:
19374 case IX86_BUILTIN_PSLLVV8HI:
19375 case IX86_BUILTIN_PSLLVV8SI:
19376 case IX86_BUILTIN_PSLLVV8SI_MASK:
19377 rcode = ASHIFT;
19378 is_vshift = true;
19379 goto do_shift;
19380 case IX86_BUILTIN_PSRAVQ128:
19381 case IX86_BUILTIN_PSRAVQ256:
19382 case IX86_BUILTIN_PSRAVV16HI:
19383 case IX86_BUILTIN_PSRAVV16SI:
19384 case IX86_BUILTIN_PSRAVV32HI:
19385 case IX86_BUILTIN_PSRAVV4SI:
19386 case IX86_BUILTIN_PSRAVV4SI_MASK:
19387 case IX86_BUILTIN_PSRAVV8DI:
19388 case IX86_BUILTIN_PSRAVV8HI:
19389 case IX86_BUILTIN_PSRAVV8SI:
19390 case IX86_BUILTIN_PSRAVV8SI_MASK:
19391 rcode = ASHIFTRT;
19392 is_vshift = true;
19393 goto do_shift;
19394 case IX86_BUILTIN_PSRLVV16HI:
19395 case IX86_BUILTIN_PSRLVV16SI:
19396 case IX86_BUILTIN_PSRLVV2DI:
19397 case IX86_BUILTIN_PSRLVV2DI_MASK:
19398 case IX86_BUILTIN_PSRLVV32HI:
19399 case IX86_BUILTIN_PSRLVV4DI:
19400 case IX86_BUILTIN_PSRLVV4DI_MASK:
19401 case IX86_BUILTIN_PSRLVV4SI:
19402 case IX86_BUILTIN_PSRLVV4SI_MASK:
19403 case IX86_BUILTIN_PSRLVV8DI:
19404 case IX86_BUILTIN_PSRLVV8HI:
19405 case IX86_BUILTIN_PSRLVV8SI:
19406 case IX86_BUILTIN_PSRLVV8SI_MASK:
19407 rcode = LSHIFTRT;
19408 is_vshift = true;
19409 goto do_shift;
19411 do_shift:
19412 gcc_assert (n_args >= 2);
19413 if (!gimple_call_lhs (stmt))
19415 gsi_replace (gsi, gimple_build_nop (), false);
19416 return true;
19418 arg0 = gimple_call_arg (stmt, 0);
19419 arg1 = gimple_call_arg (stmt, 1);
19420 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19421 /* For masked shift, only optimize if the mask is all ones. */
19422 if (n_args > 2
19423 && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
19424 break;
19425 if (is_vshift)
19427 if (TREE_CODE (arg1) != VECTOR_CST)
19428 break;
19429 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
19430 if (integer_zerop (arg1))
19431 count = 0;
19432 else if (rcode == ASHIFTRT)
19433 break;
19434 else
19435 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
19437 tree elt = VECTOR_CST_ELT (arg1, i);
19438 if (!wi::neg_p (wi::to_wide (elt))
19439 && wi::to_widest (elt) < count)
19440 return false;
19443 else
19445 arg1 = ix86_vector_shift_count (arg1);
19446 if (!arg1)
19447 break;
19448 count = tree_to_uhwi (arg1);
19450 if (count == 0)
19452 /* Just return the first argument for shift by 0. */
19453 loc = gimple_location (stmt);
19454 g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
19455 gimple_set_location (g, loc);
19456 gsi_replace (gsi, g, false);
19457 return true;
19459 if (rcode != ASHIFTRT
19460 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
19462 /* For shift counts equal or greater than precision, except for
19463 arithmetic right shift the result is zero. */
19464 loc = gimple_location (stmt);
19465 g = gimple_build_assign (gimple_call_lhs (stmt),
19466 build_zero_cst (TREE_TYPE (arg0)));
19467 gimple_set_location (g, loc);
19468 gsi_replace (gsi, g, false);
19469 return true;
19471 break;
19473 case IX86_BUILTIN_SHUFPD512:
19474 case IX86_BUILTIN_SHUFPS512:
19475 case IX86_BUILTIN_SHUFPD:
19476 case IX86_BUILTIN_SHUFPD256:
19477 case IX86_BUILTIN_SHUFPS:
19478 case IX86_BUILTIN_SHUFPS256:
19479 arg0 = gimple_call_arg (stmt, 0);
19480 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19481 /* This is masked shuffle. Only optimize if the mask is all ones. */
19482 if (n_args > 3
19483 && !ix86_masked_all_ones (elems,
19484 gimple_call_arg (stmt, n_args - 1)))
19485 break;
19486 arg2 = gimple_call_arg (stmt, 2);
19487 if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
19489 unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
19490 /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
19491 if (shuffle_mask > 255)
19492 return false;
19494 machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
19495 loc = gimple_location (stmt);
19496 tree itype = (imode == E_DFmode
19497 ? long_long_integer_type_node : integer_type_node);
19498 tree vtype = build_vector_type (itype, elems);
19499 tree_vector_builder elts (vtype, elems, 1);
19502 /* Transform integer shuffle_mask to vector perm_mask which
19503 is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
19504 for (unsigned i = 0; i != elems; i++)
19506 unsigned sel_idx;
19507 /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
19508 provide 2 select constrols for each element of the
19509 destination. */
19510 if (imode == E_DFmode)
19511 sel_idx = (i & 1) * elems + (i & ~1)
19512 + ((shuffle_mask >> i) & 1);
19513 else
19515 /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
19516 controls for each element of the destination. */
19517 unsigned j = i % 4;
19518 sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
19519 + ((shuffle_mask >> 2 * j) & 3);
19521 elts.quick_push (build_int_cst (itype, sel_idx));
19524 tree perm_mask = elts.build ();
19525 arg1 = gimple_call_arg (stmt, 1);
19526 g = gimple_build_assign (gimple_call_lhs (stmt),
19527 VEC_PERM_EXPR,
19528 arg0, arg1, perm_mask);
19529 gimple_set_location (g, loc);
19530 gsi_replace (gsi, g, false);
19531 return true;
19533 // Do not error yet, the constant could be propagated later?
19534 break;
19536 case IX86_BUILTIN_PABSB:
19537 case IX86_BUILTIN_PABSW:
19538 case IX86_BUILTIN_PABSD:
19539 /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
19540 if (!TARGET_MMX_WITH_SSE)
19541 break;
19542 /* FALLTHRU. */
19543 case IX86_BUILTIN_PABSB128:
19544 case IX86_BUILTIN_PABSB256:
19545 case IX86_BUILTIN_PABSB512:
19546 case IX86_BUILTIN_PABSW128:
19547 case IX86_BUILTIN_PABSW256:
19548 case IX86_BUILTIN_PABSW512:
19549 case IX86_BUILTIN_PABSD128:
19550 case IX86_BUILTIN_PABSD256:
19551 case IX86_BUILTIN_PABSD512:
19552 case IX86_BUILTIN_PABSQ128:
19553 case IX86_BUILTIN_PABSQ256:
19554 case IX86_BUILTIN_PABSQ512:
19555 case IX86_BUILTIN_PABSB128_MASK:
19556 case IX86_BUILTIN_PABSB256_MASK:
19557 case IX86_BUILTIN_PABSW128_MASK:
19558 case IX86_BUILTIN_PABSW256_MASK:
19559 case IX86_BUILTIN_PABSD128_MASK:
19560 case IX86_BUILTIN_PABSD256_MASK:
19561 gcc_assert (n_args >= 1);
19562 if (!gimple_call_lhs (stmt))
19564 gsi_replace (gsi, gimple_build_nop (), false);
19565 return true;
19567 arg0 = gimple_call_arg (stmt, 0);
19568 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
19569 /* For masked ABS, only optimize if the mask is all ones. */
19570 if (n_args > 1
19571 && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
19572 break;
19574 tree utype, ures, vce;
19575 utype = unsigned_type_for (TREE_TYPE (arg0));
19576 /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
19577 instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
19578 ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
19579 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19580 loc = gimple_location (stmt);
19581 vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
19582 g = gimple_build_assign (gimple_call_lhs (stmt),
19583 VIEW_CONVERT_EXPR, vce);
19584 gsi_replace (gsi, g, false);
19586 return true;
19588 default:
19589 break;
19592 return false;
19595 /* Handler for an SVML-style interface to
19596 a library with vectorized intrinsics. */
19598 tree
19599 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
19601 char name[20];
19602 tree fntype, new_fndecl, args;
19603 unsigned arity;
19604 const char *bname;
19605 machine_mode el_mode, in_mode;
19606 int n, in_n;
19608 /* The SVML is suitable for unsafe math only. */
19609 if (!flag_unsafe_math_optimizations)
19610 return NULL_TREE;
19612 el_mode = TYPE_MODE (TREE_TYPE (type_out));
19613 n = TYPE_VECTOR_SUBPARTS (type_out);
19614 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19615 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19616 if (el_mode != in_mode
19617 || n != in_n)
19618 return NULL_TREE;
19620 switch (fn)
19622 CASE_CFN_EXP:
19623 CASE_CFN_LOG:
19624 CASE_CFN_LOG10:
19625 CASE_CFN_POW:
19626 CASE_CFN_TANH:
19627 CASE_CFN_TAN:
19628 CASE_CFN_ATAN:
19629 CASE_CFN_ATAN2:
19630 CASE_CFN_ATANH:
19631 CASE_CFN_CBRT:
19632 CASE_CFN_SINH:
19633 CASE_CFN_SIN:
19634 CASE_CFN_ASINH:
19635 CASE_CFN_ASIN:
19636 CASE_CFN_COSH:
19637 CASE_CFN_COS:
19638 CASE_CFN_ACOSH:
19639 CASE_CFN_ACOS:
19640 if ((el_mode != DFmode || n != 2)
19641 && (el_mode != SFmode || n != 4))
19642 return NULL_TREE;
19643 break;
19645 default:
19646 return NULL_TREE;
19649 tree fndecl = mathfn_built_in (el_mode == DFmode
19650 ? double_type_node : float_type_node, fn);
19651 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
19653 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
19654 strcpy (name, "vmlsLn4");
19655 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
19656 strcpy (name, "vmldLn2");
19657 else if (n == 4)
19659 sprintf (name, "vmls%s", bname+10);
19660 name[strlen (name)-1] = '4';
19662 else
19663 sprintf (name, "vmld%s2", bname+10);
19665 /* Convert to uppercase. */
19666 name[4] &= ~0x20;
19668 arity = 0;
19669 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
19670 arity++;
19672 if (arity == 1)
19673 fntype = build_function_type_list (type_out, type_in, NULL);
19674 else
19675 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
19677 /* Build a function declaration for the vectorized function. */
19678 new_fndecl = build_decl (BUILTINS_LOCATION,
19679 FUNCTION_DECL, get_identifier (name), fntype);
19680 TREE_PUBLIC (new_fndecl) = 1;
19681 DECL_EXTERNAL (new_fndecl) = 1;
19682 DECL_IS_NOVOPS (new_fndecl) = 1;
19683 TREE_READONLY (new_fndecl) = 1;
19685 return new_fndecl;
19688 /* Handler for an ACML-style interface to
19689 a library with vectorized intrinsics. */
19691 tree
19692 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
19694 char name[20] = "__vr.._";
19695 tree fntype, new_fndecl, args;
19696 unsigned arity;
19697 const char *bname;
19698 machine_mode el_mode, in_mode;
19699 int n, in_n;
19701 /* The ACML is 64bits only and suitable for unsafe math only as
19702 it does not correctly support parts of IEEE with the required
19703 precision such as denormals. */
19704 if (!TARGET_64BIT
19705 || !flag_unsafe_math_optimizations)
19706 return NULL_TREE;
19708 el_mode = TYPE_MODE (TREE_TYPE (type_out));
19709 n = TYPE_VECTOR_SUBPARTS (type_out);
19710 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19711 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19712 if (el_mode != in_mode
19713 || n != in_n)
19714 return NULL_TREE;
19716 switch (fn)
19718 CASE_CFN_SIN:
19719 CASE_CFN_COS:
19720 CASE_CFN_EXP:
19721 CASE_CFN_LOG:
19722 CASE_CFN_LOG2:
19723 CASE_CFN_LOG10:
19724 if (el_mode == DFmode && n == 2)
19726 name[4] = 'd';
19727 name[5] = '2';
19729 else if (el_mode == SFmode && n == 4)
19731 name[4] = 's';
19732 name[5] = '4';
19734 else
19735 return NULL_TREE;
19736 break;
19738 default:
19739 return NULL_TREE;
19742 tree fndecl = mathfn_built_in (el_mode == DFmode
19743 ? double_type_node : float_type_node, fn);
19744 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
19745 sprintf (name + 7, "%s", bname+10);
19747 arity = 0;
19748 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
19749 arity++;
19751 if (arity == 1)
19752 fntype = build_function_type_list (type_out, type_in, NULL);
19753 else
19754 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
19756 /* Build a function declaration for the vectorized function. */
19757 new_fndecl = build_decl (BUILTINS_LOCATION,
19758 FUNCTION_DECL, get_identifier (name), fntype);
19759 TREE_PUBLIC (new_fndecl) = 1;
19760 DECL_EXTERNAL (new_fndecl) = 1;
19761 DECL_IS_NOVOPS (new_fndecl) = 1;
19762 TREE_READONLY (new_fndecl) = 1;
19764 return new_fndecl;
19767 /* Returns a decl of a function that implements scatter store with
19768 register type VECTYPE and index type INDEX_TYPE and SCALE.
19769 Return NULL_TREE if it is not available. */
19771 static tree
19772 ix86_vectorize_builtin_scatter (const_tree vectype,
19773 const_tree index_type, int scale)
19775 bool si;
19776 enum ix86_builtins code;
19777 const machine_mode mode = TYPE_MODE (TREE_TYPE (vectype));
19779 if (!TARGET_AVX512F)
19780 return NULL_TREE;
19782 if (!TARGET_EVEX512 && GET_MODE_SIZE (mode) == 64)
19783 return NULL_TREE;
19785 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
19786 ? !TARGET_USE_SCATTER_2PARTS
19787 : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
19788 ? !TARGET_USE_SCATTER_4PARTS
19789 : !TARGET_USE_SCATTER_8PARTS))
19790 return NULL_TREE;
19792 if ((TREE_CODE (index_type) != INTEGER_TYPE
19793 && !POINTER_TYPE_P (index_type))
19794 || (TYPE_MODE (index_type) != SImode
19795 && TYPE_MODE (index_type) != DImode))
19796 return NULL_TREE;
19798 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
19799 return NULL_TREE;
19801 /* v*scatter* insn sign extends index to pointer mode. */
19802 if (TYPE_PRECISION (index_type) < POINTER_SIZE
19803 && TYPE_UNSIGNED (index_type))
19804 return NULL_TREE;
19806 /* Scale can be 1, 2, 4 or 8. */
19807 if (scale <= 0
19808 || scale > 8
19809 || (scale & (scale - 1)) != 0)
19810 return NULL_TREE;
19812 si = TYPE_MODE (index_type) == SImode;
19813 switch (TYPE_MODE (vectype))
19815 case E_V8DFmode:
19816 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
19817 break;
19818 case E_V8DImode:
19819 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
19820 break;
19821 case E_V16SFmode:
19822 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
19823 break;
19824 case E_V16SImode:
19825 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
19826 break;
19827 case E_V4DFmode:
19828 if (TARGET_AVX512VL)
19829 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
19830 else
19831 return NULL_TREE;
19832 break;
19833 case E_V4DImode:
19834 if (TARGET_AVX512VL)
19835 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
19836 else
19837 return NULL_TREE;
19838 break;
19839 case E_V8SFmode:
19840 if (TARGET_AVX512VL)
19841 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
19842 else
19843 return NULL_TREE;
19844 break;
19845 case E_V8SImode:
19846 if (TARGET_AVX512VL)
19847 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
19848 else
19849 return NULL_TREE;
19850 break;
19851 case E_V2DFmode:
19852 if (TARGET_AVX512VL)
19853 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
19854 else
19855 return NULL_TREE;
19856 break;
19857 case E_V2DImode:
19858 if (TARGET_AVX512VL)
19859 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
19860 else
19861 return NULL_TREE;
19862 break;
19863 case E_V4SFmode:
19864 if (TARGET_AVX512VL)
19865 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
19866 else
19867 return NULL_TREE;
19868 break;
19869 case E_V4SImode:
19870 if (TARGET_AVX512VL)
19871 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
19872 else
19873 return NULL_TREE;
19874 break;
19875 default:
19876 return NULL_TREE;
19879 return get_ix86_builtin (code);
19882 /* Return true if it is safe to use the rsqrt optabs to optimize
19883 1.0/sqrt. */
19885 static bool
19886 use_rsqrt_p (machine_mode mode)
19888 return ((mode == HFmode
19889 || (TARGET_SSE && TARGET_SSE_MATH))
19890 && flag_finite_math_only
19891 && !flag_trapping_math
19892 && flag_unsafe_math_optimizations);
19895 /* Helper for avx_vpermilps256_operand et al. This is also used by
19896 the expansion functions to turn the parallel back into a mask.
19897 The return value is 0 for no match and the imm8+1 for a match. */
19900 avx_vpermilp_parallel (rtx par, machine_mode mode)
19902 unsigned i, nelt = GET_MODE_NUNITS (mode);
19903 unsigned mask = 0;
19904 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
19906 if (XVECLEN (par, 0) != (int) nelt)
19907 return 0;
19909 /* Validate that all of the elements are constants, and not totally
19910 out of range. Copy the data into an integral array to make the
19911 subsequent checks easier. */
19912 for (i = 0; i < nelt; ++i)
19914 rtx er = XVECEXP (par, 0, i);
19915 unsigned HOST_WIDE_INT ei;
19917 if (!CONST_INT_P (er))
19918 return 0;
19919 ei = INTVAL (er);
19920 if (ei >= nelt)
19921 return 0;
19922 ipar[i] = ei;
19925 switch (mode)
19927 case E_V8DFmode:
19928 /* In the 512-bit DFmode case, we can only move elements within
19929 a 128-bit lane. First fill the second part of the mask,
19930 then fallthru. */
19931 for (i = 4; i < 6; ++i)
19933 if (ipar[i] < 4 || ipar[i] >= 6)
19934 return 0;
19935 mask |= (ipar[i] - 4) << i;
19937 for (i = 6; i < 8; ++i)
19939 if (ipar[i] < 6)
19940 return 0;
19941 mask |= (ipar[i] - 6) << i;
19943 /* FALLTHRU */
19945 case E_V4DFmode:
19946 /* In the 256-bit DFmode case, we can only move elements within
19947 a 128-bit lane. */
19948 for (i = 0; i < 2; ++i)
19950 if (ipar[i] >= 2)
19951 return 0;
19952 mask |= ipar[i] << i;
19954 for (i = 2; i < 4; ++i)
19956 if (ipar[i] < 2)
19957 return 0;
19958 mask |= (ipar[i] - 2) << i;
19960 break;
19962 case E_V16SFmode:
19963 /* In 512 bit SFmode case, permutation in the upper 256 bits
19964 must mirror the permutation in the lower 256-bits. */
19965 for (i = 0; i < 8; ++i)
19966 if (ipar[i] + 8 != ipar[i + 8])
19967 return 0;
19968 /* FALLTHRU */
19970 case E_V8SFmode:
19971 /* In 256 bit SFmode case, we have full freedom of
19972 movement within the low 128-bit lane, but the high 128-bit
19973 lane must mirror the exact same pattern. */
19974 for (i = 0; i < 4; ++i)
19975 if (ipar[i] + 4 != ipar[i + 4])
19976 return 0;
19977 nelt = 4;
19978 /* FALLTHRU */
19980 case E_V2DFmode:
19981 case E_V4SFmode:
19982 /* In the 128-bit case, we've full freedom in the placement of
19983 the elements from the source operand. */
19984 for (i = 0; i < nelt; ++i)
19985 mask |= ipar[i] << (i * (nelt / 2));
19986 break;
19988 default:
19989 gcc_unreachable ();
19992 /* Make sure success has a non-zero value by adding one. */
19993 return mask + 1;
19996 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
19997 the expansion functions to turn the parallel back into a mask.
19998 The return value is 0 for no match and the imm8+1 for a match. */
20001 avx_vperm2f128_parallel (rtx par, machine_mode mode)
20003 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
20004 unsigned mask = 0;
20005 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
20007 if (XVECLEN (par, 0) != (int) nelt)
20008 return 0;
20010 /* Validate that all of the elements are constants, and not totally
20011 out of range. Copy the data into an integral array to make the
20012 subsequent checks easier. */
20013 for (i = 0; i < nelt; ++i)
20015 rtx er = XVECEXP (par, 0, i);
20016 unsigned HOST_WIDE_INT ei;
20018 if (!CONST_INT_P (er))
20019 return 0;
20020 ei = INTVAL (er);
20021 if (ei >= 2 * nelt)
20022 return 0;
20023 ipar[i] = ei;
20026 /* Validate that the halves of the permute are halves. */
20027 for (i = 0; i < nelt2 - 1; ++i)
20028 if (ipar[i] + 1 != ipar[i + 1])
20029 return 0;
20030 for (i = nelt2; i < nelt - 1; ++i)
20031 if (ipar[i] + 1 != ipar[i + 1])
20032 return 0;
20034 /* Reconstruct the mask. */
20035 for (i = 0; i < 2; ++i)
20037 unsigned e = ipar[i * nelt2];
20038 if (e % nelt2)
20039 return 0;
20040 e /= nelt2;
20041 mask |= e << (i * 4);
20044 /* Make sure success has a non-zero value by adding one. */
20045 return mask + 1;
20048 /* Return a mask of VPTERNLOG operands that do not affect output. */
20051 vpternlog_redundant_operand_mask (rtx pternlog_imm)
20053 int mask = 0;
20054 int imm8 = INTVAL (pternlog_imm);
20056 if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
20057 mask |= 1;
20058 if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
20059 mask |= 2;
20060 if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
20061 mask |= 4;
20063 return mask;
20066 /* Eliminate false dependencies on operands that do not affect output
20067 by substituting other operands of a VPTERNLOG. */
20069 void
20070 substitute_vpternlog_operands (rtx *operands)
20072 int mask = vpternlog_redundant_operand_mask (operands[4]);
20074 if (mask & 1) /* The first operand is redundant. */
20075 operands[1] = operands[2];
20077 if (mask & 2) /* The second operand is redundant. */
20078 operands[2] = operands[1];
20080 if (mask & 4) /* The third operand is redundant. */
20081 operands[3] = operands[1];
20082 else if (REG_P (operands[3]))
20084 if (mask & 1)
20085 operands[1] = operands[3];
20086 if (mask & 2)
20087 operands[2] = operands[3];
20091 /* Return a register priority for hard reg REGNO. */
20092 static int
20093 ix86_register_priority (int hard_regno)
20095 /* ebp and r13 as the base always wants a displacement, r12 as the
20096 base always wants an index. So discourage their usage in an
20097 address. */
20098 if (hard_regno == R12_REG || hard_regno == R13_REG)
20099 return 0;
20100 if (hard_regno == BP_REG)
20101 return 1;
20102 /* New x86-64 int registers result in bigger code size. Discourage them. */
20103 if (REX_INT_REGNO_P (hard_regno))
20104 return 2;
20105 if (REX2_INT_REGNO_P (hard_regno))
20106 return 2;
20107 /* New x86-64 SSE registers result in bigger code size. Discourage them. */
20108 if (REX_SSE_REGNO_P (hard_regno))
20109 return 2;
20110 if (EXT_REX_SSE_REGNO_P (hard_regno))
20111 return 1;
20112 /* Usage of AX register results in smaller code. Prefer it. */
20113 if (hard_regno == AX_REG)
20114 return 4;
20115 return 3;
20118 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
20120 Put float CONST_DOUBLE in the constant pool instead of fp regs.
20121 QImode must go into class Q_REGS.
20122 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20123 movdf to do mem-to-mem moves through integer regs. */
20125 static reg_class_t
20126 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
20128 machine_mode mode = GET_MODE (x);
20130 /* We're only allowed to return a subclass of CLASS. Many of the
20131 following checks fail for NO_REGS, so eliminate that early. */
20132 if (regclass == NO_REGS)
20133 return NO_REGS;
20135 /* All classes can load zeros. */
20136 if (x == CONST0_RTX (mode))
20137 return regclass;
20139 /* Force constants into memory if we are loading a (nonzero) constant into
20140 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
20141 instructions to load from a constant. */
20142 if (CONSTANT_P (x)
20143 && (MAYBE_MMX_CLASS_P (regclass)
20144 || MAYBE_SSE_CLASS_P (regclass)
20145 || MAYBE_MASK_CLASS_P (regclass)))
20146 return NO_REGS;
20148 /* Floating-point constants need more complex checks. */
20149 if (CONST_DOUBLE_P (x))
20151 /* General regs can load everything. */
20152 if (INTEGER_CLASS_P (regclass))
20153 return regclass;
20155 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20156 zero above. We only want to wind up preferring 80387 registers if
20157 we plan on doing computation with them. */
20158 if (IS_STACK_MODE (mode)
20159 && standard_80387_constant_p (x) > 0)
20161 /* Limit class to FP regs. */
20162 if (FLOAT_CLASS_P (regclass))
20163 return FLOAT_REGS;
20166 return NO_REGS;
20169 /* Prefer SSE if we can use them for math. Also allow integer regs
20170 when moves between register units are cheap. */
20171 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20173 if (TARGET_INTER_UNIT_MOVES_FROM_VEC
20174 && TARGET_INTER_UNIT_MOVES_TO_VEC
20175 && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
20176 return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20177 else
20178 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20181 /* Generally when we see PLUS here, it's the function invariant
20182 (plus soft-fp const_int). Which can only be computed into general
20183 regs. */
20184 if (GET_CODE (x) == PLUS)
20185 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
20187 /* QImode constants are easy to load, but non-constant QImode data
20188 must go into Q_REGS or ALL_MASK_REGS. */
20189 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20191 if (Q_CLASS_P (regclass))
20192 return regclass;
20193 else if (reg_class_subset_p (Q_REGS, regclass))
20194 return Q_REGS;
20195 else if (MASK_CLASS_P (regclass))
20196 return regclass;
20197 else
20198 return NO_REGS;
20201 return regclass;
20204 /* Discourage putting floating-point values in SSE registers unless
20205 SSE math is being used, and likewise for the 387 registers. */
20206 static reg_class_t
20207 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
20209 /* Restrict the output reload class to the register bank that we are doing
20210 math on. If we would like not to return a subset of CLASS, reject this
20211 alternative: if reload cannot do this, it will still use its choice. */
20212 machine_mode mode = GET_MODE (x);
20213 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20214 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
20216 if (IS_STACK_MODE (mode))
20217 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20219 return regclass;
20222 static reg_class_t
20223 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
20224 machine_mode mode, secondary_reload_info *sri)
20226 /* Double-word spills from general registers to non-offsettable memory
20227 references (zero-extended addresses) require special handling. */
20228 if (TARGET_64BIT
20229 && MEM_P (x)
20230 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
20231 && INTEGER_CLASS_P (rclass)
20232 && !offsettable_memref_p (x))
20234 sri->icode = (in_p
20235 ? CODE_FOR_reload_noff_load
20236 : CODE_FOR_reload_noff_store);
20237 /* Add the cost of moving address to a temporary. */
20238 sri->extra_cost = 1;
20240 return NO_REGS;
20243 /* QImode spills from non-QI registers require
20244 intermediate register on 32bit targets. */
20245 if (mode == QImode
20246 && ((!TARGET_64BIT && !in_p
20247 && INTEGER_CLASS_P (rclass)
20248 && MAYBE_NON_Q_CLASS_P (rclass))
20249 || (!TARGET_AVX512DQ
20250 && MAYBE_MASK_CLASS_P (rclass))))
20252 int regno = true_regnum (x);
20254 /* Return Q_REGS if the operand is in memory. */
20255 if (regno == -1)
20256 return Q_REGS;
20258 return NO_REGS;
20261 /* Require movement to gpr, and then store to memory. */
20262 if ((mode == HFmode || mode == HImode || mode == V2QImode
20263 || mode == BFmode)
20264 && !TARGET_SSE4_1
20265 && SSE_CLASS_P (rclass)
20266 && !in_p && MEM_P (x))
20268 sri->extra_cost = 1;
20269 return GENERAL_REGS;
20272 /* This condition handles corner case where an expression involving
20273 pointers gets vectorized. We're trying to use the address of a
20274 stack slot as a vector initializer.
20276 (set (reg:V2DI 74 [ vect_cst_.2 ])
20277 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
20279 Eventually frame gets turned into sp+offset like this:
20281 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20282 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
20283 (const_int 392 [0x188]))))
20285 That later gets turned into:
20287 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20288 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
20289 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
20291 We'll have the following reload recorded:
20293 Reload 0: reload_in (DI) =
20294 (plus:DI (reg/f:DI 7 sp)
20295 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
20296 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20297 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
20298 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
20299 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20300 reload_reg_rtx: (reg:V2DI 22 xmm1)
20302 Which isn't going to work since SSE instructions can't handle scalar
20303 additions. Returning GENERAL_REGS forces the addition into integer
20304 register and reload can handle subsequent reloads without problems. */
20306 if (in_p && GET_CODE (x) == PLUS
20307 && SSE_CLASS_P (rclass)
20308 && SCALAR_INT_MODE_P (mode))
20309 return GENERAL_REGS;
20311 return NO_REGS;
20314 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
20316 static bool
20317 ix86_class_likely_spilled_p (reg_class_t rclass)
20319 switch (rclass)
20321 case AREG:
20322 case DREG:
20323 case CREG:
20324 case BREG:
20325 case AD_REGS:
20326 case SIREG:
20327 case DIREG:
20328 case SSE_FIRST_REG:
20329 case FP_TOP_REG:
20330 case FP_SECOND_REG:
20331 return true;
20333 default:
20334 break;
20337 return false;
20340 /* Return true if a set of DST by the expression SRC should be allowed.
20341 This prevents complex sets of likely_spilled hard regs before reload. */
20343 bool
20344 ix86_hardreg_mov_ok (rtx dst, rtx src)
20346 /* Avoid complex sets of likely_spilled hard registers before reload. */
20347 if (REG_P (dst) && HARD_REGISTER_P (dst)
20348 && !REG_P (src) && !MEM_P (src)
20349 && !(VECTOR_MODE_P (GET_MODE (dst))
20350 ? standard_sse_constant_p (src, GET_MODE (dst))
20351 : x86_64_immediate_operand (src, GET_MODE (dst)))
20352 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
20353 && !reload_completed)
20354 return false;
20355 return true;
20358 /* If we are copying between registers from different register sets
20359 (e.g. FP and integer), we may need a memory location.
20361 The function can't work reliably when one of the CLASSES is a class
20362 containing registers from multiple sets. We avoid this by never combining
20363 different sets in a single alternative in the machine description.
20364 Ensure that this constraint holds to avoid unexpected surprises.
20366 When STRICT is false, we are being called from REGISTER_MOVE_COST,
20367 so do not enforce these sanity checks.
20369 To optimize register_move_cost performance, define inline variant. */
20371 static inline bool
20372 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
20373 reg_class_t class2, int strict)
20375 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
20376 return false;
20378 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20379 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20380 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20381 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20382 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20383 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
20384 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
20385 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
20387 gcc_assert (!strict || lra_in_progress);
20388 return true;
20391 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20392 return true;
20394 /* ??? This is a lie. We do have moves between mmx/general, and for
20395 mmx/sse2. But by saying we need secondary memory we discourage the
20396 register allocator from using the mmx registers unless needed. */
20397 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20398 return true;
20400 /* Between mask and general, we have moves no larger than word size. */
20401 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
20403 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
20404 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20405 return true;
20408 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20410 /* SSE1 doesn't have any direct moves from other classes. */
20411 if (!TARGET_SSE2)
20412 return true;
20414 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
20415 return true;
20417 int msize = GET_MODE_SIZE (mode);
20419 /* Between SSE and general, we have moves no larger than word size. */
20420 if (msize > UNITS_PER_WORD)
20421 return true;
20423 /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
20424 Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
20425 int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
20427 if (msize < minsize)
20428 return true;
20430 /* If the target says that inter-unit moves are more expensive
20431 than moving through memory, then don't generate them. */
20432 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
20433 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
20434 return true;
20437 return false;
20440 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
20442 static bool
20443 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
20444 reg_class_t class2)
20446 return inline_secondary_memory_needed (mode, class1, class2, true);
20449 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
20451 get_secondary_mem widens integral modes to BITS_PER_WORD.
20452 There is no need to emit full 64 bit move on 64 bit targets
20453 for integral modes that can be moved using 32 bit move. */
20455 static machine_mode
20456 ix86_secondary_memory_needed_mode (machine_mode mode)
20458 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
20459 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
20460 return mode;
20463 /* Implement the TARGET_CLASS_MAX_NREGS hook.
20465 On the 80386, this is the size of MODE in words,
20466 except in the FP regs, where a single reg is always enough. */
20468 static unsigned char
20469 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
20471 if (MAYBE_INTEGER_CLASS_P (rclass))
20473 if (mode == XFmode)
20474 return (TARGET_64BIT ? 2 : 3);
20475 else if (mode == XCmode)
20476 return (TARGET_64BIT ? 4 : 6);
20477 else
20478 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
20480 else
20482 if (COMPLEX_MODE_P (mode))
20483 return 2;
20484 else
20485 return 1;
20489 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
20491 static bool
20492 ix86_can_change_mode_class (machine_mode from, machine_mode to,
20493 reg_class_t regclass)
20495 if (from == to)
20496 return true;
20498 /* x87 registers can't do subreg at all, as all values are reformatted
20499 to extended precision. */
20500 if (MAYBE_FLOAT_CLASS_P (regclass))
20501 return false;
20503 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
20505 /* Vector registers do not support QI or HImode loads. If we don't
20506 disallow a change to these modes, reload will assume it's ok to
20507 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20508 the vec_dupv4hi pattern.
20509 NB: SSE2 can load 16bit data to sse register via pinsrw. */
20510 int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
20511 if (GET_MODE_SIZE (from) < mov_size
20512 || GET_MODE_SIZE (to) < mov_size)
20513 return false;
20516 return true;
20519 /* Return index of MODE in the sse load/store tables. */
20521 static inline int
20522 sse_store_index (machine_mode mode)
20524 /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
20525 costs to processor_costs, which requires changes to all entries in
20526 processor cost table. */
20527 if (mode == E_HFmode)
20528 mode = E_SFmode;
20530 switch (GET_MODE_SIZE (mode))
20532 case 4:
20533 return 0;
20534 case 8:
20535 return 1;
20536 case 16:
20537 return 2;
20538 case 32:
20539 return 3;
20540 case 64:
20541 return 4;
20542 default:
20543 return -1;
20547 /* Return the cost of moving data of mode M between a
20548 register and memory. A value of 2 is the default; this cost is
20549 relative to those in `REGISTER_MOVE_COST'.
20551 This function is used extensively by register_move_cost that is used to
20552 build tables at startup. Make it inline in this case.
20553 When IN is 2, return maximum of in and out move cost.
20555 If moving between registers and memory is more expensive than
20556 between two registers, you should define this macro to express the
20557 relative cost.
20559 Model also increased moving costs of QImode registers in non
20560 Q_REGS classes.
20562 static inline int
20563 inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
20565 int cost;
20567 if (FLOAT_CLASS_P (regclass))
20569 int index;
20570 switch (mode)
20572 case E_SFmode:
20573 index = 0;
20574 break;
20575 case E_DFmode:
20576 index = 1;
20577 break;
20578 case E_XFmode:
20579 index = 2;
20580 break;
20581 default:
20582 return 100;
20584 if (in == 2)
20585 return MAX (ix86_cost->hard_register.fp_load [index],
20586 ix86_cost->hard_register.fp_store [index]);
20587 return in ? ix86_cost->hard_register.fp_load [index]
20588 : ix86_cost->hard_register.fp_store [index];
20590 if (SSE_CLASS_P (regclass))
20592 int index = sse_store_index (mode);
20593 if (index == -1)
20594 return 100;
20595 if (in == 2)
20596 return MAX (ix86_cost->hard_register.sse_load [index],
20597 ix86_cost->hard_register.sse_store [index]);
20598 return in ? ix86_cost->hard_register.sse_load [index]
20599 : ix86_cost->hard_register.sse_store [index];
20601 if (MASK_CLASS_P (regclass))
20603 int index;
20604 switch (GET_MODE_SIZE (mode))
20606 case 1:
20607 index = 0;
20608 break;
20609 case 2:
20610 index = 1;
20611 break;
20612 /* DImode loads and stores assumed to cost the same as SImode. */
20613 case 4:
20614 case 8:
20615 index = 2;
20616 break;
20617 default:
20618 return 100;
20621 if (in == 2)
20622 return MAX (ix86_cost->hard_register.mask_load[index],
20623 ix86_cost->hard_register.mask_store[index]);
20624 return in ? ix86_cost->hard_register.mask_load[2]
20625 : ix86_cost->hard_register.mask_store[2];
20627 if (MMX_CLASS_P (regclass))
20629 int index;
20630 switch (GET_MODE_SIZE (mode))
20632 case 4:
20633 index = 0;
20634 break;
20635 case 8:
20636 index = 1;
20637 break;
20638 default:
20639 return 100;
20641 if (in == 2)
20642 return MAX (ix86_cost->hard_register.mmx_load [index],
20643 ix86_cost->hard_register.mmx_store [index]);
20644 return in ? ix86_cost->hard_register.mmx_load [index]
20645 : ix86_cost->hard_register.mmx_store [index];
20647 switch (GET_MODE_SIZE (mode))
20649 case 1:
20650 if (Q_CLASS_P (regclass) || TARGET_64BIT)
20652 if (!in)
20653 return ix86_cost->hard_register.int_store[0];
20654 if (TARGET_PARTIAL_REG_DEPENDENCY
20655 && optimize_function_for_speed_p (cfun))
20656 cost = ix86_cost->hard_register.movzbl_load;
20657 else
20658 cost = ix86_cost->hard_register.int_load[0];
20659 if (in == 2)
20660 return MAX (cost, ix86_cost->hard_register.int_store[0]);
20661 return cost;
20663 else
20665 if (in == 2)
20666 return MAX (ix86_cost->hard_register.movzbl_load,
20667 ix86_cost->hard_register.int_store[0] + 4);
20668 if (in)
20669 return ix86_cost->hard_register.movzbl_load;
20670 else
20671 return ix86_cost->hard_register.int_store[0] + 4;
20673 break;
20674 case 2:
20676 int cost;
20677 if (in == 2)
20678 cost = MAX (ix86_cost->hard_register.int_load[1],
20679 ix86_cost->hard_register.int_store[1]);
20680 else
20681 cost = in ? ix86_cost->hard_register.int_load[1]
20682 : ix86_cost->hard_register.int_store[1];
20684 if (mode == E_HFmode)
20686 /* Prefer SSE over GPR for HFmode. */
20687 int sse_cost;
20688 int index = sse_store_index (mode);
20689 if (in == 2)
20690 sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
20691 ix86_cost->hard_register.sse_store[index]);
20692 else
20693 sse_cost = (in
20694 ? ix86_cost->hard_register.sse_load [index]
20695 : ix86_cost->hard_register.sse_store [index]);
20696 if (sse_cost >= cost)
20697 cost = sse_cost + 1;
20699 return cost;
20701 default:
20702 if (in == 2)
20703 cost = MAX (ix86_cost->hard_register.int_load[2],
20704 ix86_cost->hard_register.int_store[2]);
20705 else if (in)
20706 cost = ix86_cost->hard_register.int_load[2];
20707 else
20708 cost = ix86_cost->hard_register.int_store[2];
20709 /* Multiply with the number of GPR moves needed. */
20710 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
20714 static int
20715 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
20717 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
20721 /* Return the cost of moving data from a register in class CLASS1 to
20722 one in class CLASS2.
20724 It is not required that the cost always equal 2 when FROM is the same as TO;
20725 on some machines it is expensive to move between registers if they are not
20726 general registers. */
20728 static int
20729 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
20730 reg_class_t class2_i)
20732 enum reg_class class1 = (enum reg_class) class1_i;
20733 enum reg_class class2 = (enum reg_class) class2_i;
20735 /* In case we require secondary memory, compute cost of the store followed
20736 by load. In order to avoid bad register allocation choices, we need
20737 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20739 if (inline_secondary_memory_needed (mode, class1, class2, false))
20741 int cost = 1;
20743 cost += inline_memory_move_cost (mode, class1, 2);
20744 cost += inline_memory_move_cost (mode, class2, 2);
20746 /* In case of copying from general_purpose_register we may emit multiple
20747 stores followed by single load causing memory size mismatch stall.
20748 Count this as arbitrarily high cost of 20. */
20749 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
20750 && TARGET_MEMORY_MISMATCH_STALL
20751 && targetm.class_max_nregs (class1, mode)
20752 > targetm.class_max_nregs (class2, mode))
20753 cost += 20;
20755 /* In the case of FP/MMX moves, the registers actually overlap, and we
20756 have to switch modes in order to treat them differently. */
20757 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20758 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20759 cost += 20;
20761 return cost;
20764 /* Moves between MMX and non-MMX units require secondary memory. */
20765 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20766 gcc_unreachable ();
20768 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20769 return (SSE_CLASS_P (class1)
20770 ? ix86_cost->hard_register.sse_to_integer
20771 : ix86_cost->hard_register.integer_to_sse);
20773 /* Moves between mask register and GPR. */
20774 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
20776 return (MASK_CLASS_P (class1)
20777 ? ix86_cost->hard_register.mask_to_integer
20778 : ix86_cost->hard_register.integer_to_mask);
20780 /* Moving between mask registers. */
20781 if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
20782 return ix86_cost->hard_register.mask_move;
20784 if (MAYBE_FLOAT_CLASS_P (class1))
20785 return ix86_cost->hard_register.fp_move;
20786 if (MAYBE_SSE_CLASS_P (class1))
20788 if (GET_MODE_BITSIZE (mode) <= 128)
20789 return ix86_cost->hard_register.xmm_move;
20790 if (GET_MODE_BITSIZE (mode) <= 256)
20791 return ix86_cost->hard_register.ymm_move;
20792 return ix86_cost->hard_register.zmm_move;
20794 if (MAYBE_MMX_CLASS_P (class1))
20795 return ix86_cost->hard_register.mmx_move;
20796 return 2;
20799 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
20800 words of a value of mode MODE but can be less for certain modes in
20801 special long registers.
20803 Actually there are no two word move instructions for consecutive
20804 registers. And only registers 0-3 may have mov byte instructions
20805 applied to them. */
20807 static unsigned int
20808 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
20810 if (GENERAL_REGNO_P (regno))
20812 if (mode == XFmode)
20813 return TARGET_64BIT ? 2 : 3;
20814 if (mode == XCmode)
20815 return TARGET_64BIT ? 4 : 6;
20816 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
20818 if (COMPLEX_MODE_P (mode))
20819 return 2;
20820 /* Register pair for mask registers. */
20821 if (mode == P2QImode || mode == P2HImode)
20822 return 2;
20823 if (mode == V64SFmode || mode == V64SImode)
20824 return 4;
20825 return 1;
20828 /* Implement REGMODE_NATURAL_SIZE(MODE). */
20829 unsigned int
20830 ix86_regmode_natural_size (machine_mode mode)
20832 if (mode == P2HImode || mode == P2QImode)
20833 return GET_MODE_SIZE (mode) / 2;
20834 return UNITS_PER_WORD;
20837 /* Implement TARGET_HARD_REGNO_MODE_OK. */
20839 static bool
20840 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
20842 /* Flags and only flags can only hold CCmode values. */
20843 if (CC_REGNO_P (regno))
20844 return GET_MODE_CLASS (mode) == MODE_CC;
20845 if (GET_MODE_CLASS (mode) == MODE_CC
20846 || GET_MODE_CLASS (mode) == MODE_RANDOM)
20847 return false;
20848 if (STACK_REGNO_P (regno))
20849 return VALID_FP_MODE_P (mode);
20850 if (MASK_REGNO_P (regno))
20852 /* Register pair only starts at even register number. */
20853 if ((mode == P2QImode || mode == P2HImode))
20854 return MASK_PAIR_REGNO_P(regno);
20856 return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
20857 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
20860 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20861 return false;
20863 if (SSE_REGNO_P (regno))
20865 /* We implement the move patterns for all vector modes into and
20866 out of SSE registers, even when no operation instructions
20867 are available. */
20869 /* For AVX-512 we allow, regardless of regno:
20870 - XI mode
20871 - any of 512-bit wide vector mode
20872 - any scalar mode. */
20873 if (TARGET_AVX512F
20874 && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512)
20875 || VALID_AVX512F_SCALAR_MODE (mode)))
20876 return true;
20878 /* For AVX-5124FMAPS or AVX-5124VNNIW
20879 allow V64SF and V64SI modes for special regnos. */
20880 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
20881 && (mode == V64SFmode || mode == V64SImode)
20882 && MOD4_SSE_REGNO_P (regno))
20883 return true;
20885 /* TODO check for QI/HI scalars. */
20886 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
20887 if (TARGET_AVX512VL
20888 && (VALID_AVX256_REG_OR_OI_MODE (mode)
20889 || VALID_AVX512VL_128_REG_MODE (mode)))
20890 return true;
20892 /* xmm16-xmm31 are only available for AVX-512. */
20893 if (EXT_REX_SSE_REGNO_P (regno))
20894 return false;
20896 /* Use pinsrw/pextrw to mov 16-bit data from/to sse to/from integer. */
20897 if (TARGET_SSE2 && mode == HImode)
20898 return true;
20900 /* OImode and AVX modes are available only when AVX is enabled. */
20901 return ((TARGET_AVX
20902 && VALID_AVX256_REG_OR_OI_MODE (mode))
20903 || VALID_SSE_REG_MODE (mode)
20904 || VALID_SSE2_REG_MODE (mode)
20905 || VALID_MMX_REG_MODE (mode)
20906 || VALID_MMX_REG_MODE_3DNOW (mode));
20908 if (MMX_REGNO_P (regno))
20910 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20911 so if the register is available at all, then we can move data of
20912 the given mode into or out of it. */
20913 return (VALID_MMX_REG_MODE (mode)
20914 || VALID_MMX_REG_MODE_3DNOW (mode));
20917 if (mode == QImode)
20919 /* Take care for QImode values - they can be in non-QI regs,
20920 but then they do cause partial register stalls. */
20921 if (ANY_QI_REGNO_P (regno))
20922 return true;
20923 if (!TARGET_PARTIAL_REG_STALL)
20924 return true;
20925 /* LRA checks if the hard register is OK for the given mode.
20926 QImode values can live in non-QI regs, so we allow all
20927 registers here. */
20928 if (lra_in_progress)
20929 return true;
20930 return !can_create_pseudo_p ();
20932 /* We handle both integer and floats in the general purpose registers. */
20933 else if (VALID_INT_MODE_P (mode)
20934 || VALID_FP_MODE_P (mode))
20935 return true;
20936 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20937 on to use that value in smaller contexts, this can easily force a
20938 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20939 supporting DImode, allow it. */
20940 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20941 return true;
20943 return false;
20946 /* Implement TARGET_INSN_CALLEE_ABI. */
20948 const predefined_function_abi &
20949 ix86_insn_callee_abi (const rtx_insn *insn)
20951 unsigned int abi_id = 0;
20952 rtx pat = PATTERN (insn);
20953 if (vzeroupper_pattern (pat, VOIDmode))
20954 abi_id = ABI_VZEROUPPER;
20956 return function_abis[abi_id];
20959 /* Initialize function_abis with corresponding abi_id,
20960 currently only handle vzeroupper. */
20961 void
20962 ix86_initialize_callee_abi (unsigned int abi_id)
20964 gcc_assert (abi_id == ABI_VZEROUPPER);
20965 predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
20966 if (!vzeroupper_abi.initialized_p ())
20968 HARD_REG_SET full_reg_clobbers;
20969 CLEAR_HARD_REG_SET (full_reg_clobbers);
20970 vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
20974 void
20975 ix86_expand_avx_vzeroupper (void)
20977 /* Initialize vzeroupper_abi here. */
20978 ix86_initialize_callee_abi (ABI_VZEROUPPER);
20979 rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
20980 /* Return false for non-local goto in can_nonlocal_goto. */
20981 make_reg_eh_region_note (insn, 0, INT_MIN);
20982 /* Flag used for call_insn indicates it's a fake call. */
20983 RTX_FLAG (insn, used) = 1;
20987 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
20988 saves SSE registers across calls is Win64 (thus no need to check the
20989 current ABI here), and with AVX enabled Win64 only guarantees that
20990 the low 16 bytes are saved. */
20992 static bool
20993 ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
20994 machine_mode mode)
20996 /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
20997 if (abi_id == ABI_VZEROUPPER)
20998 return (GET_MODE_SIZE (mode) > 16
20999 && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
21000 || LEGACY_SSE_REGNO_P (regno)));
21002 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
21005 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
21006 tieable integer mode. */
21008 static bool
21009 ix86_tieable_integer_mode_p (machine_mode mode)
21011 switch (mode)
21013 case E_HImode:
21014 case E_SImode:
21015 return true;
21017 case E_QImode:
21018 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
21020 case E_DImode:
21021 return TARGET_64BIT;
21023 default:
21024 return false;
21028 /* Implement TARGET_MODES_TIEABLE_P.
21030 Return true if MODE1 is accessible in a register that can hold MODE2
21031 without copying. That is, all register classes that can hold MODE2
21032 can also hold MODE1. */
21034 static bool
21035 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
21037 if (mode1 == mode2)
21038 return true;
21040 if (ix86_tieable_integer_mode_p (mode1)
21041 && ix86_tieable_integer_mode_p (mode2))
21042 return true;
21044 /* MODE2 being XFmode implies fp stack or general regs, which means we
21045 can tie any smaller floating point modes to it. Note that we do not
21046 tie this with TFmode. */
21047 if (mode2 == XFmode)
21048 return mode1 == SFmode || mode1 == DFmode;
21050 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
21051 that we can tie it with SFmode. */
21052 if (mode2 == DFmode)
21053 return mode1 == SFmode;
21055 /* If MODE2 is only appropriate for an SSE register, then tie with
21056 any other mode acceptable to SSE registers. */
21057 if (GET_MODE_SIZE (mode2) == 64
21058 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
21059 return (GET_MODE_SIZE (mode1) == 64
21060 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
21061 if (GET_MODE_SIZE (mode2) == 32
21062 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
21063 return (GET_MODE_SIZE (mode1) == 32
21064 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
21065 if (GET_MODE_SIZE (mode2) == 16
21066 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
21067 return (GET_MODE_SIZE (mode1) == 16
21068 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
21070 /* If MODE2 is appropriate for an MMX register, then tie
21071 with any other mode acceptable to MMX registers. */
21072 if (GET_MODE_SIZE (mode2) == 8
21073 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
21074 return (GET_MODE_SIZE (mode1) == 8
21075 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
21077 /* SCmode and DImode can be tied. */
21078 if ((mode1 == E_SCmode && mode2 == E_DImode)
21079 || (mode1 == E_DImode && mode2 == E_SCmode))
21080 return TARGET_64BIT;
21082 /* [SD]Cmode and V2[SD]Fmode modes can be tied. */
21083 if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
21084 || (mode1 == E_V2SFmode && mode2 == E_SCmode)
21085 || (mode1 == E_DCmode && mode2 == E_V2DFmode)
21086 || (mode1 == E_V2DFmode && mode2 == E_DCmode))
21087 return true;
21089 return false;
21092 /* Return the cost of moving between two registers of mode MODE. */
21094 static int
21095 ix86_set_reg_reg_cost (machine_mode mode)
21097 unsigned int units = UNITS_PER_WORD;
21099 switch (GET_MODE_CLASS (mode))
21101 default:
21102 break;
21104 case MODE_CC:
21105 units = GET_MODE_SIZE (CCmode);
21106 break;
21108 case MODE_FLOAT:
21109 if ((TARGET_SSE && mode == TFmode)
21110 || (TARGET_80387 && mode == XFmode)
21111 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
21112 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
21113 units = GET_MODE_SIZE (mode);
21114 break;
21116 case MODE_COMPLEX_FLOAT:
21117 if ((TARGET_SSE && mode == TCmode)
21118 || (TARGET_80387 && mode == XCmode)
21119 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
21120 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
21121 units = GET_MODE_SIZE (mode);
21122 break;
21124 case MODE_VECTOR_INT:
21125 case MODE_VECTOR_FLOAT:
21126 if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
21127 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
21128 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21129 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21130 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
21131 && VALID_MMX_REG_MODE (mode)))
21132 units = GET_MODE_SIZE (mode);
21135 /* Return the cost of moving between two registers of mode MODE,
21136 assuming that the move will be in pieces of at most UNITS bytes. */
21137 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
21140 /* Return cost of vector operation in MODE given that scalar version has
21141 COST. */
21143 static int
21144 ix86_vec_cost (machine_mode mode, int cost)
21146 if (!VECTOR_MODE_P (mode))
21147 return cost;
21149 if (GET_MODE_BITSIZE (mode) == 128
21150 && TARGET_SSE_SPLIT_REGS)
21151 return cost * GET_MODE_BITSIZE (mode) / 64;
21152 else if (GET_MODE_BITSIZE (mode) > 128
21153 && TARGET_AVX256_SPLIT_REGS)
21154 return cost * GET_MODE_BITSIZE (mode) / 128;
21155 else if (GET_MODE_BITSIZE (mode) > 256
21156 && TARGET_AVX512_SPLIT_REGS)
21157 return cost * GET_MODE_BITSIZE (mode) / 256;
21158 return cost;
21161 /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
21162 vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
21163 static int
21164 ix86_widen_mult_cost (const struct processor_costs *cost,
21165 enum machine_mode mode, bool uns_p)
21167 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
21168 int extra_cost = 0;
21169 int basic_cost = 0;
21170 switch (mode)
21172 case V8HImode:
21173 case V16HImode:
21174 if (!uns_p || mode == V16HImode)
21175 extra_cost = cost->sse_op * 2;
21176 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
21177 break;
21178 case V4SImode:
21179 case V8SImode:
21180 /* pmulhw/pmullw can be used. */
21181 basic_cost = cost->mulss * 2 + cost->sse_op * 2;
21182 break;
21183 case V2DImode:
21184 /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
21185 require extra 4 mul, 4 add, 4 cmp and 2 shift. */
21186 if (!TARGET_SSE4_1 && !uns_p)
21187 extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4
21188 + cost->sse_op * 2;
21189 /* Fallthru. */
21190 case V4DImode:
21191 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
21192 break;
21193 default:
21194 /* Not implemented. */
21195 return 100;
21197 return ix86_vec_cost (mode, basic_cost + extra_cost);
21200 /* Return cost of multiplication in MODE. */
21202 static int
21203 ix86_multiplication_cost (const struct processor_costs *cost,
21204 enum machine_mode mode)
21206 machine_mode inner_mode = mode;
21207 if (VECTOR_MODE_P (mode))
21208 inner_mode = GET_MODE_INNER (mode);
21210 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21211 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
21212 else if (X87_FLOAT_MODE_P (mode))
21213 return cost->fmul;
21214 else if (FLOAT_MODE_P (mode))
21215 return ix86_vec_cost (mode,
21216 inner_mode == DFmode ? cost->mulsd : cost->mulss);
21217 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21219 int nmults, nops;
21220 /* Cost of reading the memory. */
21221 int extra;
21223 switch (mode)
21225 case V4QImode:
21226 case V8QImode:
21227 /* Partial V*QImode is emulated with 4-6 insns. */
21228 nmults = 1;
21229 nops = 3;
21230 extra = 0;
21232 if (TARGET_AVX512BW && TARGET_AVX512VL)
21234 else if (TARGET_AVX2)
21235 nops += 2;
21236 else if (TARGET_XOP)
21237 extra += cost->sse_load[2];
21238 else
21240 nops += 1;
21241 extra += cost->sse_load[2];
21243 goto do_qimode;
21245 case V16QImode:
21246 /* V*QImode is emulated with 4-11 insns. */
21247 nmults = 1;
21248 nops = 3;
21249 extra = 0;
21251 if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
21253 if (!(TARGET_AVX512BW && TARGET_AVX512VL))
21254 nops += 3;
21256 else if (TARGET_XOP)
21258 nmults += 1;
21259 nops += 2;
21260 extra += cost->sse_load[2];
21262 else
21264 nmults += 1;
21265 nops += 4;
21266 extra += cost->sse_load[2];
21268 goto do_qimode;
21270 case V32QImode:
21271 nmults = 1;
21272 nops = 3;
21273 extra = 0;
21275 if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
21277 nmults += 1;
21278 nops += 4;
21279 extra += cost->sse_load[3] * 2;
21281 goto do_qimode;
21283 case V64QImode:
21284 nmults = 2;
21285 nops = 9;
21286 extra = cost->sse_load[3] * 2 + cost->sse_load[4] * 2;
21288 do_qimode:
21289 return ix86_vec_cost (mode, cost->mulss * nmults
21290 + cost->sse_op * nops) + extra;
21292 case V4SImode:
21293 /* pmulld is used in this case. No emulation is needed. */
21294 if (TARGET_SSE4_1)
21295 goto do_native;
21296 /* V4SImode is emulated with 7 insns. */
21297 else
21298 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
21300 case V2DImode:
21301 case V4DImode:
21302 /* vpmullq is used in this case. No emulation is needed. */
21303 if (TARGET_AVX512DQ && TARGET_AVX512VL)
21304 goto do_native;
21305 /* V*DImode is emulated with 6-8 insns. */
21306 else if (TARGET_XOP && mode == V2DImode)
21307 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 4);
21308 /* FALLTHRU */
21309 case V8DImode:
21310 /* vpmullq is used in this case. No emulation is needed. */
21311 if (TARGET_AVX512DQ && mode == V8DImode)
21312 goto do_native;
21313 else
21314 return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
21316 default:
21317 do_native:
21318 return ix86_vec_cost (mode, cost->mulss);
21321 else
21322 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
21325 /* Return cost of multiplication in MODE. */
21327 static int
21328 ix86_division_cost (const struct processor_costs *cost,
21329 enum machine_mode mode)
21331 machine_mode inner_mode = mode;
21332 if (VECTOR_MODE_P (mode))
21333 inner_mode = GET_MODE_INNER (mode);
21335 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21336 return inner_mode == DFmode ? cost->divsd : cost->divss;
21337 else if (X87_FLOAT_MODE_P (mode))
21338 return cost->fdiv;
21339 else if (FLOAT_MODE_P (mode))
21340 return ix86_vec_cost (mode,
21341 inner_mode == DFmode ? cost->divsd : cost->divss);
21342 else
21343 return cost->divide[MODE_INDEX (mode)];
21346 /* Return cost of shift in MODE.
21347 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
21348 AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
21349 if op1 is a result of subreg.
21351 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
21353 static int
21354 ix86_shift_rotate_cost (const struct processor_costs *cost,
21355 enum rtx_code code,
21356 enum machine_mode mode, bool constant_op1,
21357 HOST_WIDE_INT op1_val,
21358 bool and_in_op1,
21359 bool shift_and_truncate,
21360 bool *skip_op0, bool *skip_op1)
21362 if (skip_op0)
21363 *skip_op0 = *skip_op1 = false;
21365 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21367 int count;
21368 /* Cost of reading the memory. */
21369 int extra;
21371 switch (mode)
21373 case V4QImode:
21374 case V8QImode:
21375 if (TARGET_AVX2)
21376 /* Use vpbroadcast. */
21377 extra = cost->sse_op;
21378 else
21379 extra = cost->sse_load[2];
21381 if (constant_op1)
21383 if (code == ASHIFTRT)
21385 count = 4;
21386 extra *= 2;
21388 else
21389 count = 2;
21391 else if (TARGET_AVX512BW && TARGET_AVX512VL)
21392 return ix86_vec_cost (mode, cost->sse_op * 4);
21393 else if (TARGET_SSE4_1)
21394 count = 5;
21395 else if (code == ASHIFTRT)
21396 count = 6;
21397 else
21398 count = 5;
21399 return ix86_vec_cost (mode, cost->sse_op * count) + extra;
21401 case V16QImode:
21402 if (TARGET_XOP)
21404 /* For XOP we use vpshab, which requires a broadcast of the
21405 value to the variable shift insn. For constants this
21406 means a V16Q const in mem; even when we can perform the
21407 shift with one insn set the cost to prefer paddb. */
21408 if (constant_op1)
21410 extra = cost->sse_load[2];
21411 return ix86_vec_cost (mode, cost->sse_op) + extra;
21413 else
21415 count = (code == ASHIFT) ? 3 : 4;
21416 return ix86_vec_cost (mode, cost->sse_op * count);
21419 /* FALLTHRU */
21420 case V32QImode:
21421 if (TARGET_AVX2)
21422 /* Use vpbroadcast. */
21423 extra = cost->sse_op;
21424 else
21425 extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
21427 if (constant_op1)
21429 if (code == ASHIFTRT)
21431 count = 4;
21432 extra *= 2;
21434 else
21435 count = 2;
21437 else if (TARGET_AVX512BW
21438 && ((mode == V32QImode && !TARGET_PREFER_AVX256)
21439 || (mode == V16QImode && TARGET_AVX512VL
21440 && !TARGET_PREFER_AVX128)))
21441 return ix86_vec_cost (mode, cost->sse_op * 4);
21442 else if (TARGET_AVX2
21443 && mode == V16QImode && !TARGET_PREFER_AVX128)
21444 count = 6;
21445 else if (TARGET_SSE4_1)
21446 count = 9;
21447 else if (code == ASHIFTRT)
21448 count = 10;
21449 else
21450 count = 9;
21451 return ix86_vec_cost (mode, cost->sse_op * count) + extra;
21453 case V2DImode:
21454 case V4DImode:
21455 /* V*DImode arithmetic right shift is emulated. */
21456 if (code == ASHIFTRT && !TARGET_AVX512VL)
21458 if (constant_op1)
21460 if (op1_val == 63)
21461 count = TARGET_SSE4_2 ? 1 : 2;
21462 else if (TARGET_XOP)
21463 count = 2;
21464 else if (TARGET_SSE4_1)
21465 count = 3;
21466 else
21467 count = 4;
21469 else if (TARGET_XOP)
21470 count = 3;
21471 else if (TARGET_SSE4_2)
21472 count = 4;
21473 else
21474 count = 5;
21476 return ix86_vec_cost (mode, cost->sse_op * count);
21478 /* FALLTHRU */
21479 default:
21480 return ix86_vec_cost (mode, cost->sse_op);
21484 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21486 if (constant_op1)
21488 if (op1_val > 32)
21489 return cost->shift_const + COSTS_N_INSNS (2);
21490 else
21491 return cost->shift_const * 2;
21493 else
21495 if (and_in_op1)
21496 return cost->shift_var * 2;
21497 else
21498 return cost->shift_var * 6 + COSTS_N_INSNS (2);
21501 else
21503 if (constant_op1)
21504 return cost->shift_const;
21505 else if (shift_and_truncate)
21507 if (skip_op0)
21508 *skip_op0 = *skip_op1 = true;
21509 /* Return the cost after shift-and truncation. */
21510 return cost->shift_var;
21512 else
21513 return cost->shift_var;
21517 /* Compute a (partial) cost for rtx X. Return true if the complete
21518 cost has been computed, and false if subexpressions should be
21519 scanned. In either case, *TOTAL contains the cost result. */
21521 static bool
21522 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
21523 int *total, bool speed)
21525 rtx mask;
21526 enum rtx_code code = GET_CODE (x);
21527 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
21528 const struct processor_costs *cost
21529 = speed ? ix86_tune_cost : &ix86_size_cost;
21530 int src_cost;
21532 switch (code)
21534 case SET:
21535 if (register_operand (SET_DEST (x), VOIDmode)
21536 && register_operand (SET_SRC (x), VOIDmode))
21538 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
21539 return true;
21542 if (register_operand (SET_SRC (x), VOIDmode))
21543 /* Avoid potentially incorrect high cost from rtx_costs
21544 for non-tieable SUBREGs. */
21545 src_cost = 0;
21546 else
21548 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
21550 if (CONSTANT_P (SET_SRC (x)))
21551 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
21552 a small value, possibly zero for cheap constants. */
21553 src_cost += COSTS_N_INSNS (1);
21556 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
21557 return true;
21559 case CONST_INT:
21560 case CONST:
21561 case LABEL_REF:
21562 case SYMBOL_REF:
21563 if (x86_64_immediate_operand (x, VOIDmode))
21564 *total = 0;
21565 else
21566 *total = 1;
21567 return true;
21569 case CONST_DOUBLE:
21570 if (IS_STACK_MODE (mode))
21571 switch (standard_80387_constant_p (x))
21573 case -1:
21574 case 0:
21575 break;
21576 case 1: /* 0.0 */
21577 *total = 1;
21578 return true;
21579 default: /* Other constants */
21580 *total = 2;
21581 return true;
21583 /* FALLTHRU */
21585 case CONST_VECTOR:
21586 switch (standard_sse_constant_p (x, mode))
21588 case 0:
21589 break;
21590 case 1: /* 0: xor eliminates false dependency */
21591 *total = 0;
21592 return true;
21593 default: /* -1: cmp contains false dependency */
21594 *total = 1;
21595 return true;
21597 /* FALLTHRU */
21599 case CONST_WIDE_INT:
21600 /* Fall back to (MEM (SYMBOL_REF)), since that's where
21601 it'll probably end up. Add a penalty for size. */
21602 *total = (COSTS_N_INSNS (1)
21603 + (!TARGET_64BIT && flag_pic)
21604 + (GET_MODE_SIZE (mode) <= 4
21605 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
21606 return true;
21608 case ZERO_EXTEND:
21609 /* The zero extensions is often completely free on x86_64, so make
21610 it as cheap as possible. */
21611 if (TARGET_64BIT && mode == DImode
21612 && GET_MODE (XEXP (x, 0)) == SImode)
21613 *total = 1;
21614 else if (TARGET_ZERO_EXTEND_WITH_AND)
21615 *total = cost->add;
21616 else
21617 *total = cost->movzx;
21618 return false;
21620 case SIGN_EXTEND:
21621 *total = cost->movsx;
21622 return false;
21624 case ASHIFT:
21625 if (SCALAR_INT_MODE_P (mode)
21626 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
21627 && CONST_INT_P (XEXP (x, 1)))
21629 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
21630 if (value == 1)
21632 *total = cost->add;
21633 return false;
21635 if ((value == 2 || value == 3)
21636 && cost->lea <= cost->shift_const)
21638 *total = cost->lea;
21639 return false;
21642 /* FALLTHRU */
21644 case ROTATE:
21645 case ASHIFTRT:
21646 case LSHIFTRT:
21647 case ROTATERT:
21648 bool skip_op0, skip_op1;
21649 *total = ix86_shift_rotate_cost (cost, code, mode,
21650 CONSTANT_P (XEXP (x, 1)),
21651 CONST_INT_P (XEXP (x, 1))
21652 ? INTVAL (XEXP (x, 1)) : -1,
21653 GET_CODE (XEXP (x, 1)) == AND,
21654 SUBREG_P (XEXP (x, 1))
21655 && GET_CODE (XEXP (XEXP (x, 1),
21656 0)) == AND,
21657 &skip_op0, &skip_op1);
21658 if (skip_op0 || skip_op1)
21660 if (!skip_op0)
21661 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
21662 if (!skip_op1)
21663 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
21664 return true;
21666 return false;
21668 case FMA:
21670 rtx sub;
21672 gcc_assert (FLOAT_MODE_P (mode));
21673 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
21675 *total = ix86_vec_cost (mode,
21676 GET_MODE_INNER (mode) == SFmode
21677 ? cost->fmass : cost->fmasd);
21678 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
21680 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
21681 sub = XEXP (x, 0);
21682 if (GET_CODE (sub) == NEG)
21683 sub = XEXP (sub, 0);
21684 *total += rtx_cost (sub, mode, FMA, 0, speed);
21686 sub = XEXP (x, 2);
21687 if (GET_CODE (sub) == NEG)
21688 sub = XEXP (sub, 0);
21689 *total += rtx_cost (sub, mode, FMA, 2, speed);
21690 return true;
21693 case MULT:
21694 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
21696 rtx op0 = XEXP (x, 0);
21697 rtx op1 = XEXP (x, 1);
21698 int nbits;
21699 if (CONST_INT_P (XEXP (x, 1)))
21701 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
21702 for (nbits = 0; value != 0; value &= value - 1)
21703 nbits++;
21705 else
21706 /* This is arbitrary. */
21707 nbits = 7;
21709 /* Compute costs correctly for widening multiplication. */
21710 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
21711 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
21712 == GET_MODE_SIZE (mode))
21714 int is_mulwiden = 0;
21715 machine_mode inner_mode = GET_MODE (op0);
21717 if (GET_CODE (op0) == GET_CODE (op1))
21718 is_mulwiden = 1, op1 = XEXP (op1, 0);
21719 else if (CONST_INT_P (op1))
21721 if (GET_CODE (op0) == SIGN_EXTEND)
21722 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
21723 == INTVAL (op1);
21724 else
21725 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
21728 if (is_mulwiden)
21729 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
21732 int mult_init;
21733 // Double word multiplication requires 3 mults and 2 adds.
21734 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21736 mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
21737 + 2 * cost->add;
21738 nbits *= 3;
21740 else mult_init = cost->mult_init[MODE_INDEX (mode)];
21742 *total = (mult_init
21743 + nbits * cost->mult_bit
21744 + rtx_cost (op0, mode, outer_code, opno, speed)
21745 + rtx_cost (op1, mode, outer_code, opno, speed));
21747 return true;
21749 *total = ix86_multiplication_cost (cost, mode);
21750 return false;
21752 case DIV:
21753 case UDIV:
21754 case MOD:
21755 case UMOD:
21756 *total = ix86_division_cost (cost, mode);
21757 return false;
21759 case PLUS:
21760 if (GET_MODE_CLASS (mode) == MODE_INT
21761 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
21763 if (GET_CODE (XEXP (x, 0)) == PLUS
21764 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
21765 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
21766 && CONSTANT_P (XEXP (x, 1)))
21768 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
21769 if (val == 2 || val == 4 || val == 8)
21771 *total = cost->lea;
21772 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21773 outer_code, opno, speed);
21774 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
21775 outer_code, opno, speed);
21776 *total += rtx_cost (XEXP (x, 1), mode,
21777 outer_code, opno, speed);
21778 return true;
21781 else if (GET_CODE (XEXP (x, 0)) == MULT
21782 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
21784 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
21785 if (val == 2 || val == 4 || val == 8)
21787 *total = cost->lea;
21788 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21789 outer_code, opno, speed);
21790 *total += rtx_cost (XEXP (x, 1), mode,
21791 outer_code, opno, speed);
21792 return true;
21795 else if (GET_CODE (XEXP (x, 0)) == PLUS)
21797 rtx op = XEXP (XEXP (x, 0), 0);
21799 /* Add with carry, ignore the cost of adding a carry flag. */
21800 if (ix86_carry_flag_operator (op, mode)
21801 || ix86_carry_flag_unset_operator (op, mode))
21802 *total = cost->add;
21803 else
21805 *total = cost->lea;
21806 *total += rtx_cost (op, mode,
21807 outer_code, opno, speed);
21810 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21811 outer_code, opno, speed);
21812 *total += rtx_cost (XEXP (x, 1), mode,
21813 outer_code, opno, speed);
21814 return true;
21817 /* FALLTHRU */
21819 case MINUS:
21820 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
21821 if (GET_MODE_CLASS (mode) == MODE_INT
21822 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
21823 && GET_CODE (XEXP (x, 0)) == MINUS
21824 && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
21825 || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
21827 *total = cost->add;
21828 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21829 outer_code, opno, speed);
21830 *total += rtx_cost (XEXP (x, 1), mode,
21831 outer_code, opno, speed);
21832 return true;
21835 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21836 *total = cost->addss;
21837 else if (X87_FLOAT_MODE_P (mode))
21838 *total = cost->fadd;
21839 else if (FLOAT_MODE_P (mode))
21840 *total = ix86_vec_cost (mode, cost->addss);
21841 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21842 *total = ix86_vec_cost (mode, cost->sse_op);
21843 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21844 *total = cost->add * 2;
21845 else
21846 *total = cost->add;
21847 return false;
21849 case IOR:
21850 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
21851 || SSE_FLOAT_MODE_P (mode))
21853 /* (ior (not ...) ...) can be a single insn in AVX512. */
21854 if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
21855 && ((TARGET_EVEX512
21856 && GET_MODE_SIZE (mode) == 64)
21857 || (TARGET_AVX512VL
21858 && (GET_MODE_SIZE (mode) == 32
21859 || GET_MODE_SIZE (mode) == 16))))
21861 rtx right = GET_CODE (XEXP (x, 1)) != NOT
21862 ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
21864 *total = ix86_vec_cost (mode, cost->sse_op)
21865 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21866 outer_code, opno, speed)
21867 + rtx_cost (right, mode, outer_code, opno, speed);
21868 return true;
21870 *total = ix86_vec_cost (mode, cost->sse_op);
21872 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21873 *total = cost->add * 2;
21874 else
21875 *total = cost->add;
21876 return false;
21878 case XOR:
21879 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
21880 || SSE_FLOAT_MODE_P (mode))
21881 *total = ix86_vec_cost (mode, cost->sse_op);
21882 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21883 *total = cost->add * 2;
21884 else
21885 *total = cost->add;
21886 return false;
21888 case AND:
21889 if (address_no_seg_operand (x, mode))
21891 *total = cost->lea;
21892 return true;
21894 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
21895 || SSE_FLOAT_MODE_P (mode))
21897 /* pandn is a single instruction. */
21898 if (GET_CODE (XEXP (x, 0)) == NOT)
21900 rtx right = XEXP (x, 1);
21902 /* (and (not ...) (not ...)) can be a single insn in AVX512. */
21903 if (GET_CODE (right) == NOT && TARGET_AVX512F
21904 && ((TARGET_EVEX512
21905 && GET_MODE_SIZE (mode) == 64)
21906 || (TARGET_AVX512VL
21907 && (GET_MODE_SIZE (mode) == 32
21908 || GET_MODE_SIZE (mode) == 16))))
21909 right = XEXP (right, 0);
21911 *total = ix86_vec_cost (mode, cost->sse_op)
21912 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21913 outer_code, opno, speed)
21914 + rtx_cost (right, mode, outer_code, opno, speed);
21915 return true;
21917 else if (GET_CODE (XEXP (x, 1)) == NOT)
21919 *total = ix86_vec_cost (mode, cost->sse_op)
21920 + rtx_cost (XEXP (x, 0), mode,
21921 outer_code, opno, speed)
21922 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21923 outer_code, opno, speed);
21924 return true;
21926 *total = ix86_vec_cost (mode, cost->sse_op);
21928 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21930 if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
21932 *total = cost->add * 2
21933 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21934 outer_code, opno, speed)
21935 + rtx_cost (XEXP (x, 1), mode,
21936 outer_code, opno, speed);
21937 return true;
21939 else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
21941 *total = cost->add * 2
21942 + rtx_cost (XEXP (x, 0), mode,
21943 outer_code, opno, speed)
21944 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21945 outer_code, opno, speed);
21946 return true;
21948 *total = cost->add * 2;
21950 else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
21952 *total = cost->add
21953 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21954 outer_code, opno, speed)
21955 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
21956 return true;
21958 else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
21960 *total = cost->add
21961 + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
21962 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21963 outer_code, opno, speed);
21964 return true;
21966 else
21967 *total = cost->add;
21968 return false;
21970 case NOT:
21971 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21973 /* (not (xor ...)) can be a single insn in AVX512. */
21974 if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
21975 && ((TARGET_EVEX512
21976 && GET_MODE_SIZE (mode) == 64)
21977 || (TARGET_AVX512VL
21978 && (GET_MODE_SIZE (mode) == 32
21979 || GET_MODE_SIZE (mode) == 16))))
21981 *total = ix86_vec_cost (mode, cost->sse_op)
21982 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21983 outer_code, opno, speed)
21984 + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21985 outer_code, opno, speed);
21986 return true;
21989 // vnot is pxor -1.
21990 *total = ix86_vec_cost (mode, cost->sse_op) + 1;
21992 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21993 *total = cost->add * 2;
21994 else
21995 *total = cost->add;
21996 return false;
21998 case NEG:
21999 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22000 *total = cost->sse_op;
22001 else if (X87_FLOAT_MODE_P (mode))
22002 *total = cost->fchs;
22003 else if (FLOAT_MODE_P (mode))
22004 *total = ix86_vec_cost (mode, cost->sse_op);
22005 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22006 *total = ix86_vec_cost (mode, cost->sse_op);
22007 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22008 *total = cost->add * 3;
22009 else
22010 *total = cost->add;
22011 return false;
22013 case COMPARE:
22014 rtx op0, op1;
22015 op0 = XEXP (x, 0);
22016 op1 = XEXP (x, 1);
22017 if (GET_CODE (op0) == ZERO_EXTRACT
22018 && XEXP (op0, 1) == const1_rtx
22019 && CONST_INT_P (XEXP (op0, 2))
22020 && op1 == const0_rtx)
22022 /* This kind of construct is implemented using test[bwl].
22023 Treat it as if we had an AND. */
22024 mode = GET_MODE (XEXP (op0, 0));
22025 *total = (cost->add
22026 + rtx_cost (XEXP (op0, 0), mode, outer_code,
22027 opno, speed)
22028 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
22029 return true;
22032 if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
22034 /* This is an overflow detection, count it as a normal compare. */
22035 *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
22036 return true;
22039 rtx geu;
22040 /* Match x
22041 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
22042 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
22043 if (mode == CCCmode
22044 && GET_CODE (op0) == NEG
22045 && GET_CODE (geu = XEXP (op0, 0)) == GEU
22046 && REG_P (XEXP (geu, 0))
22047 && (GET_MODE (XEXP (geu, 0)) == CCCmode
22048 || GET_MODE (XEXP (geu, 0)) == CCmode)
22049 && REGNO (XEXP (geu, 0)) == FLAGS_REG
22050 && XEXP (geu, 1) == const0_rtx
22051 && GET_CODE (op1) == LTU
22052 && REG_P (XEXP (op1, 0))
22053 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
22054 && REGNO (XEXP (op1, 0)) == FLAGS_REG
22055 && XEXP (op1, 1) == const0_rtx)
22057 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
22058 *total = 0;
22059 return true;
22061 /* Match x
22062 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
22063 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */
22064 if (mode == CCCmode
22065 && GET_CODE (op0) == NEG
22066 && GET_CODE (XEXP (op0, 0)) == LTU
22067 && REG_P (XEXP (XEXP (op0, 0), 0))
22068 && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
22069 && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
22070 && XEXP (XEXP (op0, 0), 1) == const0_rtx
22071 && GET_CODE (op1) == GEU
22072 && REG_P (XEXP (op1, 0))
22073 && GET_MODE (XEXP (op1, 0)) == CCCmode
22074 && REGNO (XEXP (op1, 0)) == FLAGS_REG
22075 && XEXP (op1, 1) == const0_rtx)
22077 /* This is *x86_cmc. */
22078 if (!speed)
22079 *total = COSTS_N_BYTES (1);
22080 else if (TARGET_SLOW_STC)
22081 *total = COSTS_N_INSNS (2);
22082 else
22083 *total = COSTS_N_INSNS (1);
22084 return true;
22087 if (SCALAR_INT_MODE_P (GET_MODE (op0))
22088 && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
22090 if (op1 == const0_rtx)
22091 *total = cost->add
22092 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
22093 else
22094 *total = 3*cost->add
22095 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
22096 + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
22097 return true;
22100 /* The embedded comparison operand is completely free. */
22101 if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
22102 *total = 0;
22104 return false;
22106 case FLOAT_EXTEND:
22107 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22108 *total = 0;
22109 else
22110 *total = ix86_vec_cost (mode, cost->addss);
22111 return false;
22113 case FLOAT_TRUNCATE:
22114 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22115 *total = cost->fadd;
22116 else
22117 *total = ix86_vec_cost (mode, cost->addss);
22118 return false;
22120 case ABS:
22121 /* SSE requires memory load for the constant operand. It may make
22122 sense to account for this. Of course the constant operand may or
22123 may not be reused. */
22124 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22125 *total = cost->sse_op;
22126 else if (X87_FLOAT_MODE_P (mode))
22127 *total = cost->fabs;
22128 else if (FLOAT_MODE_P (mode))
22129 *total = ix86_vec_cost (mode, cost->sse_op);
22130 return false;
22132 case SQRT:
22133 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
22134 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
22135 else if (X87_FLOAT_MODE_P (mode))
22136 *total = cost->fsqrt;
22137 else if (FLOAT_MODE_P (mode))
22138 *total = ix86_vec_cost (mode,
22139 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
22140 return false;
22142 case UNSPEC:
22143 if (XINT (x, 1) == UNSPEC_TP)
22144 *total = 0;
22145 else if (XINT (x, 1) == UNSPEC_VTERNLOG)
22147 *total = cost->sse_op;
22148 return true;
22150 else if (XINT (x, 1) == UNSPEC_PTEST)
22152 *total = cost->sse_op;
22153 rtx test_op0 = XVECEXP (x, 0, 0);
22154 if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
22155 return false;
22156 if (GET_CODE (test_op0) == AND)
22158 rtx and_op0 = XEXP (test_op0, 0);
22159 if (GET_CODE (and_op0) == NOT)
22160 and_op0 = XEXP (and_op0, 0);
22161 *total += rtx_cost (and_op0, GET_MODE (and_op0),
22162 AND, 0, speed)
22163 + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
22164 AND, 1, speed);
22166 else
22167 *total = rtx_cost (test_op0, GET_MODE (test_op0),
22168 UNSPEC, 0, speed);
22169 return true;
22171 return false;
22173 case VEC_SELECT:
22174 case VEC_CONCAT:
22175 case VEC_DUPLICATE:
22176 /* ??? Assume all of these vector manipulation patterns are
22177 recognizable. In which case they all pretty much have the
22178 same cost. */
22179 *total = cost->sse_op;
22180 return true;
22181 case VEC_MERGE:
22182 mask = XEXP (x, 2);
22183 /* This is masked instruction, assume the same cost,
22184 as nonmasked variant. */
22185 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
22186 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
22187 else
22188 *total = cost->sse_op;
22189 return true;
22191 case MEM:
22192 /* An insn that accesses memory is slightly more expensive
22193 than one that does not. */
22194 if (speed)
22195 *total += 1;
22196 return false;
22198 case ZERO_EXTRACT:
22199 if (XEXP (x, 1) == const1_rtx
22200 && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
22201 && GET_MODE (XEXP (x, 2)) == SImode
22202 && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
22204 /* Ignore cost of zero extension and masking of last argument. */
22205 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22206 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
22207 *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
22208 return true;
22210 return false;
22212 case IF_THEN_ELSE:
22213 if (TARGET_XOP
22214 && VECTOR_MODE_P (mode)
22215 && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
22217 /* vpcmov. */
22218 *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
22219 if (!REG_P (XEXP (x, 0)))
22220 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22221 if (!REG_P (XEXP (x, 1)))
22222 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
22223 if (!REG_P (XEXP (x, 2)))
22224 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
22225 return true;
22227 else if (TARGET_CMOVE
22228 && SCALAR_INT_MODE_P (mode)
22229 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
22231 /* cmov. */
22232 *total = COSTS_N_INSNS (1);
22233 if (!REG_P (XEXP (x, 0)))
22234 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
22235 if (!REG_P (XEXP (x, 1)))
22236 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
22237 if (!REG_P (XEXP (x, 2)))
22238 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
22239 return true;
22241 return false;
22243 default:
22244 return false;
22248 #if TARGET_MACHO
22250 static int current_machopic_label_num;
22252 /* Given a symbol name and its associated stub, write out the
22253 definition of the stub. */
22255 void
22256 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22258 unsigned int length;
22259 char *binder_name, *symbol_name, lazy_ptr_name[32];
22260 int label = ++current_machopic_label_num;
22262 /* For 64-bit we shouldn't get here. */
22263 gcc_assert (!TARGET_64BIT);
22265 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22266 symb = targetm.strip_name_encoding (symb);
22268 length = strlen (stub);
22269 binder_name = XALLOCAVEC (char, length + 32);
22270 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22272 length = strlen (symb);
22273 symbol_name = XALLOCAVEC (char, length + 32);
22274 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22276 sprintf (lazy_ptr_name, "L%d$lz", label);
22278 if (MACHOPIC_ATT_STUB)
22279 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
22280 else if (MACHOPIC_PURE)
22281 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
22282 else
22283 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22285 fprintf (file, "%s:\n", stub);
22286 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22288 if (MACHOPIC_ATT_STUB)
22290 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
22292 else if (MACHOPIC_PURE)
22294 /* PIC stub. */
22295 /* 25-byte PIC stub using "CALL get_pc_thunk". */
22296 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
22297 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
22298 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
22299 label, lazy_ptr_name, label);
22300 fprintf (file, "\tjmp\t*%%ecx\n");
22302 else
22303 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22305 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
22306 it needs no stub-binding-helper. */
22307 if (MACHOPIC_ATT_STUB)
22308 return;
22310 fprintf (file, "%s:\n", binder_name);
22312 if (MACHOPIC_PURE)
22314 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
22315 fprintf (file, "\tpushl\t%%ecx\n");
22317 else
22318 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22320 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
22322 /* N.B. Keep the correspondence of these
22323 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
22324 old-pic/new-pic/non-pic stubs; altering this will break
22325 compatibility with existing dylibs. */
22326 if (MACHOPIC_PURE)
22328 /* 25-byte PIC stub using "CALL get_pc_thunk". */
22329 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
22331 else
22332 /* 16-byte -mdynamic-no-pic stub. */
22333 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
22335 fprintf (file, "%s:\n", lazy_ptr_name);
22336 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22337 fprintf (file, ASM_LONG "%s\n", binder_name);
22339 #endif /* TARGET_MACHO */
22341 /* Order the registers for register allocator. */
22343 void
22344 x86_order_regs_for_local_alloc (void)
22346 int pos = 0;
22347 int i;
22349 /* First allocate the local general purpose registers. */
22350 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22351 if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
22352 reg_alloc_order [pos++] = i;
22354 /* Global general purpose registers. */
22355 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22356 if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
22357 reg_alloc_order [pos++] = i;
22359 /* x87 registers come first in case we are doing FP math
22360 using them. */
22361 if (!TARGET_SSE_MATH)
22362 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22363 reg_alloc_order [pos++] = i;
22365 /* SSE registers. */
22366 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22367 reg_alloc_order [pos++] = i;
22368 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22369 reg_alloc_order [pos++] = i;
22371 /* Extended REX SSE registers. */
22372 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
22373 reg_alloc_order [pos++] = i;
22375 /* Mask register. */
22376 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
22377 reg_alloc_order [pos++] = i;
22379 /* x87 registers. */
22380 if (TARGET_SSE_MATH)
22381 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22382 reg_alloc_order [pos++] = i;
22384 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22385 reg_alloc_order [pos++] = i;
22387 /* Initialize the rest of array as we do not allocate some registers
22388 at all. */
22389 while (pos < FIRST_PSEUDO_REGISTER)
22390 reg_alloc_order [pos++] = 0;
22393 static bool
22394 ix86_ms_bitfield_layout_p (const_tree record_type)
22396 return ((TARGET_MS_BITFIELD_LAYOUT
22397 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22398 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
22401 /* Returns an expression indicating where the this parameter is
22402 located on entry to the FUNCTION. */
22404 static rtx
22405 x86_this_parameter (tree function)
22407 tree type = TREE_TYPE (function);
22408 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22409 int nregs;
22411 if (TARGET_64BIT)
22413 const int *parm_regs;
22415 if (ix86_function_type_abi (type) == MS_ABI)
22416 parm_regs = x86_64_ms_abi_int_parameter_registers;
22417 else
22418 parm_regs = x86_64_int_parameter_registers;
22419 return gen_rtx_REG (Pmode, parm_regs[aggr]);
22422 nregs = ix86_function_regparm (type, function);
22424 if (nregs > 0 && !stdarg_p (type))
22426 int regno;
22427 unsigned int ccvt = ix86_get_callcvt (type);
22429 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
22430 regno = aggr ? DX_REG : CX_REG;
22431 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
22433 regno = CX_REG;
22434 if (aggr)
22435 return gen_rtx_MEM (SImode,
22436 plus_constant (Pmode, stack_pointer_rtx, 4));
22438 else
22440 regno = AX_REG;
22441 if (aggr)
22443 regno = DX_REG;
22444 if (nregs == 1)
22445 return gen_rtx_MEM (SImode,
22446 plus_constant (Pmode,
22447 stack_pointer_rtx, 4));
22450 return gen_rtx_REG (SImode, regno);
22453 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
22454 aggr ? 8 : 4));
22457 /* Determine whether x86_output_mi_thunk can succeed. */
22459 static bool
22460 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
22461 const_tree function)
22463 /* 64-bit can handle anything. */
22464 if (TARGET_64BIT)
22465 return true;
22467 /* For 32-bit, everything's fine if we have one free register. */
22468 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
22469 return true;
22471 /* Need a free register for vcall_offset. */
22472 if (vcall_offset)
22473 return false;
22475 /* Need a free register for GOT references. */
22476 if (flag_pic && !targetm.binds_local_p (function))
22477 return false;
22479 /* Otherwise ok. */
22480 return true;
22483 /* Output the assembler code for a thunk function. THUNK_DECL is the
22484 declaration for the thunk function itself, FUNCTION is the decl for
22485 the target function. DELTA is an immediate constant offset to be
22486 added to THIS. If VCALL_OFFSET is nonzero, the word at
22487 *(*this + vcall_offset) should be added to THIS. */
22489 static void
22490 x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
22491 HOST_WIDE_INT vcall_offset, tree function)
22493 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
22494 rtx this_param = x86_this_parameter (function);
22495 rtx this_reg, tmp, fnaddr;
22496 unsigned int tmp_regno;
22497 rtx_insn *insn;
22498 int saved_flag_force_indirect_call = flag_force_indirect_call;
22500 if (TARGET_64BIT)
22501 tmp_regno = R10_REG;
22502 else
22504 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
22505 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
22506 tmp_regno = AX_REG;
22507 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
22508 tmp_regno = DX_REG;
22509 else
22510 tmp_regno = CX_REG;
22512 if (flag_pic)
22513 flag_force_indirect_call = 0;
22516 emit_note (NOTE_INSN_PROLOGUE_END);
22518 /* CET is enabled, insert EB instruction. */
22519 if ((flag_cf_protection & CF_BRANCH))
22520 emit_insn (gen_nop_endbr ());
22522 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22523 pull it in now and let DELTA benefit. */
22524 if (REG_P (this_param))
22525 this_reg = this_param;
22526 else if (vcall_offset)
22528 /* Put the this parameter into %eax. */
22529 this_reg = gen_rtx_REG (Pmode, AX_REG);
22530 emit_move_insn (this_reg, this_param);
22532 else
22533 this_reg = NULL_RTX;
22535 /* Adjust the this parameter by a fixed constant. */
22536 if (delta)
22538 rtx delta_rtx = GEN_INT (delta);
22539 rtx delta_dst = this_reg ? this_reg : this_param;
22541 if (TARGET_64BIT)
22543 if (!x86_64_general_operand (delta_rtx, Pmode))
22545 tmp = gen_rtx_REG (Pmode, tmp_regno);
22546 emit_move_insn (tmp, delta_rtx);
22547 delta_rtx = tmp;
22551 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
22554 /* Adjust the this parameter by a value stored in the vtable. */
22555 if (vcall_offset)
22557 rtx vcall_addr, vcall_mem, this_mem;
22559 tmp = gen_rtx_REG (Pmode, tmp_regno);
22561 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
22562 if (Pmode != ptr_mode)
22563 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
22564 emit_move_insn (tmp, this_mem);
22566 /* Adjust the this parameter. */
22567 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
22568 if (TARGET_64BIT
22569 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
22571 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
22572 emit_move_insn (tmp2, GEN_INT (vcall_offset));
22573 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
22576 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
22577 if (Pmode != ptr_mode)
22578 emit_insn (gen_addsi_1_zext (this_reg,
22579 gen_rtx_REG (ptr_mode,
22580 REGNO (this_reg)),
22581 vcall_mem));
22582 else
22583 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
22586 /* If necessary, drop THIS back to its stack slot. */
22587 if (this_reg && this_reg != this_param)
22588 emit_move_insn (this_param, this_reg);
22590 fnaddr = XEXP (DECL_RTL (function), 0);
22591 if (TARGET_64BIT)
22593 if (!flag_pic || targetm.binds_local_p (function)
22594 || TARGET_PECOFF)
22596 else
22598 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
22599 tmp = gen_rtx_CONST (Pmode, tmp);
22600 fnaddr = gen_const_mem (Pmode, tmp);
22603 else
22605 if (!flag_pic || targetm.binds_local_p (function))
22607 #if TARGET_MACHO
22608 else if (TARGET_MACHO)
22610 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
22611 fnaddr = XEXP (fnaddr, 0);
22613 #endif /* TARGET_MACHO */
22614 else
22616 tmp = gen_rtx_REG (Pmode, CX_REG);
22617 output_set_got (tmp, NULL_RTX);
22619 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
22620 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
22621 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
22622 fnaddr = gen_const_mem (Pmode, fnaddr);
22626 /* Our sibling call patterns do not allow memories, because we have no
22627 predicate that can distinguish between frame and non-frame memory.
22628 For our purposes here, we can get away with (ab)using a jump pattern,
22629 because we're going to do no optimization. */
22630 if (MEM_P (fnaddr))
22632 if (sibcall_insn_operand (fnaddr, word_mode))
22634 fnaddr = XEXP (DECL_RTL (function), 0);
22635 tmp = gen_rtx_MEM (QImode, fnaddr);
22636 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
22637 tmp = emit_call_insn (tmp);
22638 SIBLING_CALL_P (tmp) = 1;
22640 else
22641 emit_jump_insn (gen_indirect_jump (fnaddr));
22643 else
22645 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
22647 // CM_LARGE_PIC always uses pseudo PIC register which is
22648 // uninitialized. Since FUNCTION is local and calling it
22649 // doesn't go through PLT, we use scratch register %r11 as
22650 // PIC register and initialize it here.
22651 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
22652 ix86_init_large_pic_reg (tmp_regno);
22653 fnaddr = legitimize_pic_address (fnaddr,
22654 gen_rtx_REG (Pmode, tmp_regno));
22657 if (!sibcall_insn_operand (fnaddr, word_mode))
22659 tmp = gen_rtx_REG (word_mode, tmp_regno);
22660 if (GET_MODE (fnaddr) != word_mode)
22661 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
22662 emit_move_insn (tmp, fnaddr);
22663 fnaddr = tmp;
22666 tmp = gen_rtx_MEM (QImode, fnaddr);
22667 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
22668 tmp = emit_call_insn (tmp);
22669 SIBLING_CALL_P (tmp) = 1;
22671 emit_barrier ();
22673 /* Emit just enough of rest_of_compilation to get the insns emitted. */
22674 insn = get_insns ();
22675 shorten_branches (insn);
22676 assemble_start_function (thunk_fndecl, fnname);
22677 final_start_function (insn, file, 1);
22678 final (insn, file, 1);
22679 final_end_function ();
22680 assemble_end_function (thunk_fndecl, fnname);
22682 flag_force_indirect_call = saved_flag_force_indirect_call;
22685 static void
22686 x86_file_start (void)
22688 default_file_start ();
22689 if (TARGET_16BIT)
22690 fputs ("\t.code16gcc\n", asm_out_file);
22691 #if TARGET_MACHO
22692 darwin_file_start ();
22693 #endif
22694 if (X86_FILE_START_VERSION_DIRECTIVE)
22695 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
22696 if (X86_FILE_START_FLTUSED)
22697 fputs ("\t.global\t__fltused\n", asm_out_file);
22698 if (ix86_asm_dialect == ASM_INTEL)
22699 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
22703 x86_field_alignment (tree type, int computed)
22705 machine_mode mode;
22707 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
22708 return computed;
22709 if (TARGET_IAMCU)
22710 return iamcu_alignment (type, computed);
22711 type = strip_array_types (type);
22712 mode = TYPE_MODE (type);
22713 if (mode == DFmode || mode == DCmode
22714 || GET_MODE_CLASS (mode) == MODE_INT
22715 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
22717 if (TYPE_ATOMIC (type) && computed > 32)
22719 static bool warned;
22721 if (!warned && warn_psabi)
22723 const char *url
22724 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
22726 warned = true;
22727 inform (input_location, "the alignment of %<_Atomic %T%> "
22728 "fields changed in %{GCC 11.1%}",
22729 TYPE_MAIN_VARIANT (type), url);
22732 else
22733 return MIN (32, computed);
22735 return computed;
22738 /* Print call to TARGET to FILE. */
22740 static void
22741 x86_print_call_or_nop (FILE *file, const char *target)
22743 if (flag_nop_mcount || !strcmp (target, "nop"))
22744 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
22745 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
22746 else
22747 fprintf (file, "1:\tcall\t%s\n", target);
22750 static bool
22751 current_fentry_name (const char **name)
22753 tree attr = lookup_attribute ("fentry_name",
22754 DECL_ATTRIBUTES (current_function_decl));
22755 if (!attr)
22756 return false;
22757 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
22758 return true;
22761 static bool
22762 current_fentry_section (const char **name)
22764 tree attr = lookup_attribute ("fentry_section",
22765 DECL_ATTRIBUTES (current_function_decl));
22766 if (!attr)
22767 return false;
22768 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
22769 return true;
22772 /* Return a caller-saved register which isn't live or a callee-saved
22773 register which has been saved on stack in the prologue at entry for
22774 profile. */
22776 static int
22777 x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
22779 /* Use %r10 if the profiler is emitted before the prologue or it isn't
22780 used by DRAP. */
22781 if (ix86_profile_before_prologue ()
22782 || !crtl->drap_reg
22783 || REGNO (crtl->drap_reg) != R10_REG)
22784 return R10_REG;
22786 /* The profiler is emitted after the prologue. If there is a
22787 caller-saved register which isn't live or a callee-saved
22788 register saved on stack in the prologue, use it. */
22790 bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
22792 int i;
22793 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22794 if (GENERAL_REGNO_P (i)
22795 && i != R10_REG
22796 #ifdef NO_PROFILE_COUNTERS
22797 && (r11_ok || i != R11_REG)
22798 #else
22799 && i != R11_REG
22800 #endif
22801 && TEST_HARD_REG_BIT (accessible_reg_set, i)
22802 && (ix86_save_reg (i, true, true)
22803 || (call_used_regs[i]
22804 && !fixed_regs[i]
22805 && !REGNO_REG_SET_P (reg_live, i))))
22806 return i;
22808 sorry ("no register available for profiling %<-mcmodel=large%s%>",
22809 ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
22811 return R10_REG;
22814 /* Output assembler code to FILE to increment profiler label # LABELNO
22815 for profiling a function entry. */
22816 void
22817 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
22819 if (cfun->machine->insn_queued_at_entrance)
22821 if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
22822 fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
22823 unsigned int patch_area_size
22824 = crtl->patch_area_size - crtl->patch_area_entry;
22825 if (patch_area_size)
22826 ix86_output_patchable_area (patch_area_size,
22827 crtl->patch_area_entry == 0);
22830 const char *mcount_name = MCOUNT_NAME;
22832 if (current_fentry_name (&mcount_name))
22834 else if (fentry_name)
22835 mcount_name = fentry_name;
22836 else if (flag_fentry)
22837 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
22839 if (TARGET_64BIT)
22841 #ifndef NO_PROFILE_COUNTERS
22842 if (ASSEMBLER_DIALECT == ASM_INTEL)
22843 fprintf (file, "\tlea\tr11, %sP%d[rip]\n", LPREFIX, labelno);
22844 else
22845 fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
22846 #endif
22848 int scratch;
22849 const char *reg;
22850 char legacy_reg[4] = { 0 };
22852 if (!TARGET_PECOFF)
22854 switch (ix86_cmodel)
22856 case CM_LARGE:
22857 scratch = x86_64_select_profile_regnum (true);
22858 reg = hi_reg_name[scratch];
22859 if (LEGACY_INT_REGNO_P (scratch))
22861 legacy_reg[0] = 'r';
22862 legacy_reg[1] = reg[0];
22863 legacy_reg[2] = reg[1];
22864 reg = legacy_reg;
22866 if (ASSEMBLER_DIALECT == ASM_INTEL)
22867 fprintf (file, "1:\tmovabs\t%s, OFFSET FLAT:%s\n"
22868 "\tcall\t%s\n", reg, mcount_name, reg);
22869 else
22870 fprintf (file, "1:\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
22871 mcount_name, reg, reg);
22872 break;
22873 case CM_LARGE_PIC:
22874 #ifdef NO_PROFILE_COUNTERS
22875 scratch = x86_64_select_profile_regnum (false);
22876 reg = hi_reg_name[scratch];
22877 if (LEGACY_INT_REGNO_P (scratch))
22879 legacy_reg[0] = 'r';
22880 legacy_reg[1] = reg[0];
22881 legacy_reg[2] = reg[1];
22882 reg = legacy_reg;
22884 if (ASSEMBLER_DIALECT == ASM_INTEL)
22886 fprintf (file, "1:movabs\tr11, "
22887 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
22888 fprintf (file, "\tlea\t%s, 1b[rip]\n", reg);
22889 fprintf (file, "\tadd\t%s, r11\n", reg);
22890 fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
22891 mcount_name);
22892 fprintf (file, "\tadd\t%s, r11\n", reg);
22893 fprintf (file, "\tcall\t%s\n", reg);
22894 break;
22896 fprintf (file,
22897 "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
22898 fprintf (file, "\tleaq\t1b(%%rip), %%%s\n", reg);
22899 fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
22900 fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
22901 fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
22902 fprintf (file, "\tcall\t*%%%s\n", reg);
22903 #else
22904 sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
22905 #endif
22906 break;
22907 case CM_SMALL_PIC:
22908 case CM_MEDIUM_PIC:
22909 if (!ix86_direct_extern_access)
22911 if (ASSEMBLER_DIALECT == ASM_INTEL)
22912 fprintf (file, "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
22913 mcount_name);
22914 else
22915 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n",
22916 mcount_name);
22917 break;
22919 /* fall through */
22920 default:
22921 x86_print_call_or_nop (file, mcount_name);
22922 break;
22925 else
22926 x86_print_call_or_nop (file, mcount_name);
22928 else if (flag_pic)
22930 #ifndef NO_PROFILE_COUNTERS
22931 if (ASSEMBLER_DIALECT == ASM_INTEL)
22932 fprintf (file,
22933 "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n",
22934 LPREFIX, labelno);
22935 else
22936 fprintf (file,
22937 "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
22938 LPREFIX, labelno);
22939 #endif
22940 if (ASSEMBLER_DIALECT == ASM_INTEL)
22941 fprintf (file, "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name);
22942 else
22943 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
22945 else
22947 #ifndef NO_PROFILE_COUNTERS
22948 if (ASSEMBLER_DIALECT == ASM_INTEL)
22949 fprintf (file,
22950 "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n",
22951 LPREFIX, labelno);
22952 else
22953 fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
22954 LPREFIX, labelno);
22955 #endif
22956 x86_print_call_or_nop (file, mcount_name);
22959 if (flag_record_mcount
22960 || lookup_attribute ("fentry_section",
22961 DECL_ATTRIBUTES (current_function_decl)))
22963 const char *sname = "__mcount_loc";
22965 if (current_fentry_section (&sname))
22967 else if (fentry_section)
22968 sname = fentry_section;
22970 fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
22971 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
22972 fprintf (file, "\t.previous\n");
22976 /* We don't have exact information about the insn sizes, but we may assume
22977 quite safely that we are informed about all 1 byte insns and memory
22978 address sizes. This is enough to eliminate unnecessary padding in
22979 99% of cases. */
22982 ix86_min_insn_size (rtx_insn *insn)
22984 int l = 0, len;
22986 if (!INSN_P (insn) || !active_insn_p (insn))
22987 return 0;
22989 /* Discard alignments we've emit and jump instructions. */
22990 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
22991 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
22992 return 0;
22994 /* Important case - calls are always 5 bytes.
22995 It is common to have many calls in the row. */
22996 if (CALL_P (insn)
22997 && symbolic_reference_mentioned_p (PATTERN (insn))
22998 && !SIBLING_CALL_P (insn))
22999 return 5;
23000 len = get_attr_length (insn);
23001 if (len <= 1)
23002 return 1;
23004 /* For normal instructions we rely on get_attr_length being exact,
23005 with a few exceptions. */
23006 if (!JUMP_P (insn))
23008 enum attr_type type = get_attr_type (insn);
23010 switch (type)
23012 case TYPE_MULTI:
23013 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
23014 || asm_noperands (PATTERN (insn)) >= 0)
23015 return 0;
23016 break;
23017 case TYPE_OTHER:
23018 case TYPE_FCMP:
23019 break;
23020 default:
23021 /* Otherwise trust get_attr_length. */
23022 return len;
23025 l = get_attr_length_address (insn);
23026 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23027 l = 4;
23029 if (l)
23030 return 1+l;
23031 else
23032 return 2;
23035 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
23037 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23038 window. */
23040 static void
23041 ix86_avoid_jump_mispredicts (void)
23043 rtx_insn *insn, *start = get_insns ();
23044 int nbytes = 0, njumps = 0;
23045 bool isjump = false;
23047 /* Look for all minimal intervals of instructions containing 4 jumps.
23048 The intervals are bounded by START and INSN. NBYTES is the total
23049 size of instructions in the interval including INSN and not including
23050 START. When the NBYTES is smaller than 16 bytes, it is possible
23051 that the end of START and INSN ends up in the same 16byte page.
23053 The smallest offset in the page INSN can start is the case where START
23054 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23055 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
23057 Don't consider asm goto as jump, while it can contain a jump, it doesn't
23058 have to, control transfer to label(s) can be performed through other
23059 means, and also we estimate minimum length of all asm stmts as 0. */
23060 for (insn = start; insn; insn = NEXT_INSN (insn))
23062 int min_size;
23064 if (LABEL_P (insn))
23066 align_flags alignment = label_to_alignment (insn);
23067 int align = alignment.levels[0].log;
23068 int max_skip = alignment.levels[0].maxskip;
23070 if (max_skip > 15)
23071 max_skip = 15;
23072 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
23073 already in the current 16 byte page, because otherwise
23074 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
23075 bytes to reach 16 byte boundary. */
23076 if (align <= 0
23077 || (align <= 3 && max_skip != (1 << align) - 1))
23078 max_skip = 0;
23079 if (dump_file)
23080 fprintf (dump_file, "Label %i with max_skip %i\n",
23081 INSN_UID (insn), max_skip);
23082 if (max_skip)
23084 while (nbytes + max_skip >= 16)
23086 start = NEXT_INSN (start);
23087 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
23088 || CALL_P (start))
23089 njumps--, isjump = true;
23090 else
23091 isjump = false;
23092 nbytes -= ix86_min_insn_size (start);
23095 continue;
23098 min_size = ix86_min_insn_size (insn);
23099 nbytes += min_size;
23100 if (dump_file)
23101 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
23102 INSN_UID (insn), min_size);
23103 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
23104 || CALL_P (insn))
23105 njumps++;
23106 else
23107 continue;
23109 while (njumps > 3)
23111 start = NEXT_INSN (start);
23112 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
23113 || CALL_P (start))
23114 njumps--, isjump = true;
23115 else
23116 isjump = false;
23117 nbytes -= ix86_min_insn_size (start);
23119 gcc_assert (njumps >= 0);
23120 if (dump_file)
23121 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
23122 INSN_UID (start), INSN_UID (insn), nbytes);
23124 if (njumps == 3 && isjump && nbytes < 16)
23126 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
23128 if (dump_file)
23129 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23130 INSN_UID (insn), padsize);
23131 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
23135 #endif
23137 /* AMD Athlon works faster
23138 when RET is not destination of conditional jump or directly preceded
23139 by other jump instruction. We avoid the penalty by inserting NOP just
23140 before the RET instructions in such cases. */
23141 static void
23142 ix86_pad_returns (void)
23144 edge e;
23145 edge_iterator ei;
23147 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
23149 basic_block bb = e->src;
23150 rtx_insn *ret = BB_END (bb);
23151 rtx_insn *prev;
23152 bool replace = false;
23154 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
23155 || optimize_bb_for_size_p (bb))
23156 continue;
23157 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23158 if (active_insn_p (prev) || LABEL_P (prev))
23159 break;
23160 if (prev && LABEL_P (prev))
23162 edge e;
23163 edge_iterator ei;
23165 FOR_EACH_EDGE (e, ei, bb->preds)
23166 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23167 && !(e->flags & EDGE_FALLTHRU))
23169 replace = true;
23170 break;
23173 if (!replace)
23175 prev = prev_active_insn (ret);
23176 if (prev
23177 && ((JUMP_P (prev) && any_condjump_p (prev))
23178 || CALL_P (prev)))
23179 replace = true;
23180 /* Empty functions get branch mispredict even when
23181 the jump destination is not visible to us. */
23182 if (!prev && !optimize_function_for_size_p (cfun))
23183 replace = true;
23185 if (replace)
23187 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
23188 delete_insn (ret);
23193 /* Count the minimum number of instructions in BB. Return 4 if the
23194 number of instructions >= 4. */
23196 static int
23197 ix86_count_insn_bb (basic_block bb)
23199 rtx_insn *insn;
23200 int insn_count = 0;
23202 /* Count number of instructions in this block. Return 4 if the number
23203 of instructions >= 4. */
23204 FOR_BB_INSNS (bb, insn)
23206 /* Only happen in exit blocks. */
23207 if (JUMP_P (insn)
23208 && ANY_RETURN_P (PATTERN (insn)))
23209 break;
23211 if (NONDEBUG_INSN_P (insn)
23212 && GET_CODE (PATTERN (insn)) != USE
23213 && GET_CODE (PATTERN (insn)) != CLOBBER)
23215 insn_count++;
23216 if (insn_count >= 4)
23217 return insn_count;
23221 return insn_count;
23225 /* Count the minimum number of instructions in code path in BB.
23226 Return 4 if the number of instructions >= 4. */
23228 static int
23229 ix86_count_insn (basic_block bb)
23231 edge e;
23232 edge_iterator ei;
23233 int min_prev_count;
23235 /* Only bother counting instructions along paths with no
23236 more than 2 basic blocks between entry and exit. Given
23237 that BB has an edge to exit, determine if a predecessor
23238 of BB has an edge from entry. If so, compute the number
23239 of instructions in the predecessor block. If there
23240 happen to be multiple such blocks, compute the minimum. */
23241 min_prev_count = 4;
23242 FOR_EACH_EDGE (e, ei, bb->preds)
23244 edge prev_e;
23245 edge_iterator prev_ei;
23247 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
23249 min_prev_count = 0;
23250 break;
23252 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
23254 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
23256 int count = ix86_count_insn_bb (e->src);
23257 if (count < min_prev_count)
23258 min_prev_count = count;
23259 break;
23264 if (min_prev_count < 4)
23265 min_prev_count += ix86_count_insn_bb (bb);
23267 return min_prev_count;
23270 /* Pad short function to 4 instructions. */
23272 static void
23273 ix86_pad_short_function (void)
23275 edge e;
23276 edge_iterator ei;
23278 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
23280 rtx_insn *ret = BB_END (e->src);
23281 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
23283 int insn_count = ix86_count_insn (e->src);
23285 /* Pad short function. */
23286 if (insn_count < 4)
23288 rtx_insn *insn = ret;
23290 /* Find epilogue. */
23291 while (insn
23292 && (!NOTE_P (insn)
23293 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
23294 insn = PREV_INSN (insn);
23296 if (!insn)
23297 insn = ret;
23299 /* Two NOPs count as one instruction. */
23300 insn_count = 2 * (4 - insn_count);
23301 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
23307 /* Fix up a Windows system unwinder issue. If an EH region falls through into
23308 the epilogue, the Windows system unwinder will apply epilogue logic and
23309 produce incorrect offsets. This can be avoided by adding a nop between
23310 the last insn that can throw and the first insn of the epilogue. */
23312 static void
23313 ix86_seh_fixup_eh_fallthru (void)
23315 edge e;
23316 edge_iterator ei;
23318 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
23320 rtx_insn *insn, *next;
23322 /* Find the beginning of the epilogue. */
23323 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
23324 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
23325 break;
23326 if (insn == NULL)
23327 continue;
23329 /* We only care about preceding insns that can throw. */
23330 insn = prev_active_insn (insn);
23331 if (insn == NULL || !can_throw_internal (insn))
23332 continue;
23334 /* Do not separate calls from their debug information. */
23335 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
23336 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
23337 insn = next;
23338 else
23339 break;
23341 emit_insn_after (gen_nops (const1_rtx), insn);
23344 /* Split vector load from parm_decl to elemental loads to avoid STLF
23345 stalls. */
23346 static void
23347 ix86_split_stlf_stall_load ()
23349 rtx_insn* insn, *start = get_insns ();
23350 unsigned window = 0;
23352 for (insn = start; insn; insn = NEXT_INSN (insn))
23354 if (!NONDEBUG_INSN_P (insn))
23355 continue;
23356 window++;
23357 /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
23358 other, just emulate for pipeline) before stalled load, stlf stall
23359 case is as fast as no stall cases on CLX.
23360 Since CFG is freed before machine_reorg, just do a rough
23361 calculation of the window according to the layout. */
23362 if (window > (unsigned) x86_stlf_window_ninsns)
23363 return;
23365 if (any_uncondjump_p (insn)
23366 || ANY_RETURN_P (PATTERN (insn))
23367 || CALL_P (insn))
23368 return;
23370 rtx set = single_set (insn);
23371 if (!set)
23372 continue;
23373 rtx src = SET_SRC (set);
23374 if (!MEM_P (src)
23375 /* Only handle V2DFmode load since it doesn't need any scratch
23376 register. */
23377 || GET_MODE (src) != E_V2DFmode
23378 || !MEM_EXPR (src)
23379 || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
23380 continue;
23382 rtx zero = CONST0_RTX (V2DFmode);
23383 rtx dest = SET_DEST (set);
23384 rtx m = adjust_address (src, DFmode, 0);
23385 rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
23386 emit_insn_before (loadlpd, insn);
23387 m = adjust_address (src, DFmode, 8);
23388 rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
23389 if (dump_file && (dump_flags & TDF_DETAILS))
23391 fputs ("Due to potential STLF stall, split instruction:\n",
23392 dump_file);
23393 print_rtl_single (dump_file, insn);
23394 fputs ("To:\n", dump_file);
23395 print_rtl_single (dump_file, loadlpd);
23396 print_rtl_single (dump_file, loadhpd);
23398 PATTERN (insn) = loadhpd;
23399 INSN_CODE (insn) = -1;
23400 gcc_assert (recog_memoized (insn) != -1);
23404 /* Implement machine specific optimizations. We implement padding of returns
23405 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23406 static void
23407 ix86_reorg (void)
23409 /* We are freeing block_for_insn in the toplev to keep compatibility
23410 with old MDEP_REORGS that are not CFG based. Recompute it now. */
23411 compute_bb_for_insn ();
23413 if (TARGET_SEH && current_function_has_exception_handlers ())
23414 ix86_seh_fixup_eh_fallthru ();
23416 if (optimize && optimize_function_for_speed_p (cfun))
23418 if (TARGET_SSE2)
23419 ix86_split_stlf_stall_load ();
23420 if (TARGET_PAD_SHORT_FUNCTION)
23421 ix86_pad_short_function ();
23422 else if (TARGET_PAD_RETURNS)
23423 ix86_pad_returns ();
23424 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
23425 if (TARGET_FOUR_JUMP_LIMIT)
23426 ix86_avoid_jump_mispredicts ();
23427 #endif
23431 /* Return nonzero when QImode register that must be represented via REX prefix
23432 is used. */
23433 bool
23434 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
23436 int i;
23437 extract_insn_cached (insn);
23438 for (i = 0; i < recog_data.n_operands; i++)
23439 if (GENERAL_REG_P (recog_data.operand[i])
23440 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
23441 return true;
23442 return false;
23445 /* Return true when INSN mentions register that must be encoded using REX
23446 prefix. */
23447 bool
23448 x86_extended_reg_mentioned_p (rtx insn)
23450 subrtx_iterator::array_type array;
23451 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
23453 const_rtx x = *iter;
23454 if (REG_P (x)
23455 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))
23456 || REX2_INT_REGNO_P (REGNO (x))))
23457 return true;
23459 return false;
23462 /* Return true when INSN mentions register that must be encoded using REX2
23463 prefix. */
23464 bool
23465 x86_extended_rex2reg_mentioned_p (rtx insn)
23467 subrtx_iterator::array_type array;
23468 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
23470 const_rtx x = *iter;
23471 if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x)))
23472 return true;
23474 return false;
23477 /* Return true when rtx operands mentions register that must be encoded using
23478 evex prefix. */
23479 bool
23480 x86_evex_reg_mentioned_p (rtx operands[], int nops)
23482 int i;
23483 for (i = 0; i < nops; i++)
23484 if (EXT_REX_SSE_REG_P (operands[i])
23485 || x86_extended_rex2reg_mentioned_p (operands[i]))
23486 return true;
23487 return false;
23490 /* If profitable, negate (without causing overflow) integer constant
23491 of mode MODE at location LOC. Return true in this case. */
23492 bool
23493 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
23495 HOST_WIDE_INT val;
23497 if (!CONST_INT_P (*loc))
23498 return false;
23500 switch (mode)
23502 case E_DImode:
23503 /* DImode x86_64 constants must fit in 32 bits. */
23504 gcc_assert (x86_64_immediate_operand (*loc, mode));
23506 mode = SImode;
23507 break;
23509 case E_SImode:
23510 case E_HImode:
23511 case E_QImode:
23512 break;
23514 default:
23515 gcc_unreachable ();
23518 /* Avoid overflows. */
23519 if (mode_signbit_p (mode, *loc))
23520 return false;
23522 val = INTVAL (*loc);
23524 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
23525 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
23526 if ((val < 0 && val != -128)
23527 || val == 128)
23529 *loc = GEN_INT (-val);
23530 return true;
23533 return false;
23536 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23537 optabs would emit if we didn't have TFmode patterns. */
23539 void
23540 x86_emit_floatuns (rtx operands[2])
23542 rtx_code_label *neglab, *donelab;
23543 rtx i0, i1, f0, in, out;
23544 machine_mode mode, inmode;
23546 inmode = GET_MODE (operands[1]);
23547 gcc_assert (inmode == SImode || inmode == DImode);
23549 out = operands[0];
23550 in = force_reg (inmode, operands[1]);
23551 mode = GET_MODE (out);
23552 neglab = gen_label_rtx ();
23553 donelab = gen_label_rtx ();
23554 f0 = gen_reg_rtx (mode);
23556 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23558 expand_float (out, in, 0);
23560 emit_jump_insn (gen_jump (donelab));
23561 emit_barrier ();
23563 emit_label (neglab);
23565 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23566 1, OPTAB_DIRECT);
23567 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23568 1, OPTAB_DIRECT);
23569 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23571 expand_float (f0, i0, 0);
23573 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
23575 emit_label (donelab);
23578 /* Return the diagnostic message string if conversion from FROMTYPE to
23579 TOTYPE is not allowed, NULL otherwise. */
23581 static const char *
23582 ix86_invalid_conversion (const_tree fromtype, const_tree totype)
23584 machine_mode from_mode = element_mode (fromtype);
23585 machine_mode to_mode = element_mode (totype);
23587 if (!TARGET_SSE2 && from_mode != to_mode)
23589 /* Do no allow conversions to/from BFmode/HFmode scalar types
23590 when TARGET_SSE2 is not available. */
23591 if (from_mode == BFmode)
23592 return N_("invalid conversion from type %<__bf16%> "
23593 "without option %<-msse2%>");
23594 if (from_mode == HFmode)
23595 return N_("invalid conversion from type %<_Float16%> "
23596 "without option %<-msse2%>");
23597 if (to_mode == BFmode)
23598 return N_("invalid conversion to type %<__bf16%> "
23599 "without option %<-msse2%>");
23600 if (to_mode == HFmode)
23601 return N_("invalid conversion to type %<_Float16%> "
23602 "without option %<-msse2%>");
23605 /* Warn for silent implicit conversion between __bf16 and short,
23606 since __bfloat16 is refined as real __bf16 instead of short
23607 since GCC13. */
23608 if (element_mode (fromtype) != element_mode (totype)
23609 && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
23611 /* Warn for silent implicit conversion where user may expect
23612 a bitcast. */
23613 if ((TYPE_MODE (fromtype) == BFmode
23614 && TYPE_MODE (totype) == HImode)
23615 || (TYPE_MODE (totype) == BFmode
23616 && TYPE_MODE (fromtype) == HImode))
23617 warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
23618 "to real %<__bf16%> since GCC 13.1, be careful of "
23619 "implicit conversion between %<__bf16%> and %<short%>; "
23620 "an explicit bitcast may be needed here");
23623 /* Conversion allowed. */
23624 return NULL;
23627 /* Return the diagnostic message string if the unary operation OP is
23628 not permitted on TYPE, NULL otherwise. */
23630 static const char *
23631 ix86_invalid_unary_op (int op, const_tree type)
23633 machine_mode mmode = element_mode (type);
23634 /* Reject all single-operand operations on BFmode/HFmode except for &
23635 when TARGET_SSE2 is not available. */
23636 if (!TARGET_SSE2 && op != ADDR_EXPR)
23638 if (mmode == BFmode)
23639 return N_("operation not permitted on type %<__bf16%> "
23640 "without option %<-msse2%>");
23641 if (mmode == HFmode)
23642 return N_("operation not permitted on type %<_Float16%> "
23643 "without option %<-msse2%>");
23646 /* Operation allowed. */
23647 return NULL;
23650 /* Return the diagnostic message string if the binary operation OP is
23651 not permitted on TYPE1 and TYPE2, NULL otherwise. */
23653 static const char *
23654 ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
23655 const_tree type2)
23657 machine_mode type1_mode = element_mode (type1);
23658 machine_mode type2_mode = element_mode (type2);
23659 /* Reject all 2-operand operations on BFmode or HFmode
23660 when TARGET_SSE2 is not available. */
23661 if (!TARGET_SSE2)
23663 if (type1_mode == BFmode || type2_mode == BFmode)
23664 return N_("operation not permitted on type %<__bf16%> "
23665 "without option %<-msse2%>");
23667 if (type1_mode == HFmode || type2_mode == HFmode)
23668 return N_("operation not permitted on type %<_Float16%> "
23669 "without option %<-msse2%>");
23672 /* Operation allowed. */
23673 return NULL;
23677 /* Target hook for scalar_mode_supported_p. */
23678 static bool
23679 ix86_scalar_mode_supported_p (scalar_mode mode)
23681 if (DECIMAL_FLOAT_MODE_P (mode))
23682 return default_decimal_float_supported_p ();
23683 else if (mode == TFmode)
23684 return true;
23685 else if (mode == HFmode || mode == BFmode)
23686 return true;
23687 else
23688 return default_scalar_mode_supported_p (mode);
23691 /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
23692 if MODE is HFmode, and punt to the generic implementation otherwise. */
23694 static bool
23695 ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
23697 /* NB: Always return TRUE for HFmode so that the _Float16 type will
23698 be defined by the C front-end for AVX512FP16 intrinsics. We will
23699 issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
23700 enabled. */
23701 return ((mode == HFmode || mode == BFmode)
23702 ? true
23703 : default_libgcc_floating_mode_supported_p (mode));
23706 /* Implements target hook vector_mode_supported_p. */
23707 static bool
23708 ix86_vector_mode_supported_p (machine_mode mode)
23710 /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
23711 either. */
23712 if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
23713 return false;
23714 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
23715 return true;
23716 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
23717 return true;
23718 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
23719 return true;
23720 if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
23721 return true;
23722 if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
23723 && VALID_MMX_REG_MODE (mode))
23724 return true;
23725 if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
23726 && VALID_MMX_REG_MODE_3DNOW (mode))
23727 return true;
23728 if (mode == V2QImode)
23729 return true;
23730 return false;
23733 /* Target hook for c_mode_for_suffix. */
23734 static machine_mode
23735 ix86_c_mode_for_suffix (char suffix)
23737 if (suffix == 'q')
23738 return TFmode;
23739 if (suffix == 'w')
23740 return XFmode;
23742 return VOIDmode;
23745 /* Helper function to map common constraints to non-EGPR ones.
23746 All related constraints have h prefix, and h plus Upper letter
23747 means the constraint is strictly EGPR enabled, while h plus
23748 lower letter indicates the constraint is strictly gpr16 only.
23750 Specially for "g" constraint, split it to rmi as there is
23751 no corresponding general constraint define for backend.
23753 Here is the full list to map constraints that may involve
23754 gpr to h prefixed.
23756 "g" -> "jrjmi"
23757 "r" -> "jr"
23758 "m" -> "jm"
23759 "<" -> "j<"
23760 ">" -> "j>"
23761 "o" -> "jo"
23762 "V" -> "jV"
23763 "p" -> "jp"
23764 "Bm" -> "ja"
23767 static void map_egpr_constraints (vec<const char *> &constraints)
23769 for (size_t i = 0; i < constraints.length(); i++)
23771 const char *cur = constraints[i];
23773 if (startswith (cur, "=@cc"))
23774 continue;
23776 int len = strlen (cur);
23777 auto_vec<char> buf;
23779 for (int j = 0; j < len; j++)
23781 switch (cur[j])
23783 case 'g':
23784 buf.safe_push ('j');
23785 buf.safe_push ('r');
23786 buf.safe_push ('j');
23787 buf.safe_push ('m');
23788 buf.safe_push ('i');
23789 break;
23790 case 'r':
23791 case 'm':
23792 case '<':
23793 case '>':
23794 case 'o':
23795 case 'V':
23796 case 'p':
23797 buf.safe_push ('j');
23798 buf.safe_push (cur[j]);
23799 break;
23800 case 'B':
23801 if (cur[j + 1] == 'm')
23803 buf.safe_push ('j');
23804 buf.safe_push ('a');
23805 j++;
23807 else
23809 buf.safe_push (cur[j]);
23810 buf.safe_push (cur[j + 1]);
23811 j++;
23813 break;
23814 case 'T':
23815 case 'Y':
23816 case 'W':
23817 case 'j':
23818 buf.safe_push (cur[j]);
23819 buf.safe_push (cur[j + 1]);
23820 j++;
23821 break;
23822 default:
23823 buf.safe_push (cur[j]);
23824 break;
23827 buf.safe_push ('\0');
23828 constraints[i] = xstrdup (buf.address ());
23832 /* Worker function for TARGET_MD_ASM_ADJUST.
23834 We implement asm flag outputs, and maintain source compatibility
23835 with the old cc0-based compiler. */
23837 static rtx_insn *
23838 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
23839 vec<machine_mode> & /*input_modes*/,
23840 vec<const char *> &constraints, vec<rtx> &/*uses*/,
23841 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
23842 location_t loc)
23844 bool saw_asm_flag = false;
23846 start_sequence ();
23848 if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
23849 map_egpr_constraints (constraints);
23851 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
23853 const char *con = constraints[i];
23854 if (!startswith (con, "=@cc"))
23855 continue;
23856 con += 4;
23857 if (strchr (con, ',') != NULL)
23859 error_at (loc, "alternatives not allowed in %<asm%> flag output");
23860 continue;
23863 bool invert = false;
23864 if (con[0] == 'n')
23865 invert = true, con++;
23867 machine_mode mode = CCmode;
23868 rtx_code code = UNKNOWN;
23870 switch (con[0])
23872 case 'a':
23873 if (con[1] == 0)
23874 mode = CCAmode, code = EQ;
23875 else if (con[1] == 'e' && con[2] == 0)
23876 mode = CCCmode, code = NE;
23877 break;
23878 case 'b':
23879 if (con[1] == 0)
23880 mode = CCCmode, code = EQ;
23881 else if (con[1] == 'e' && con[2] == 0)
23882 mode = CCAmode, code = NE;
23883 break;
23884 case 'c':
23885 if (con[1] == 0)
23886 mode = CCCmode, code = EQ;
23887 break;
23888 case 'e':
23889 if (con[1] == 0)
23890 mode = CCZmode, code = EQ;
23891 break;
23892 case 'g':
23893 if (con[1] == 0)
23894 mode = CCGCmode, code = GT;
23895 else if (con[1] == 'e' && con[2] == 0)
23896 mode = CCGCmode, code = GE;
23897 break;
23898 case 'l':
23899 if (con[1] == 0)
23900 mode = CCGCmode, code = LT;
23901 else if (con[1] == 'e' && con[2] == 0)
23902 mode = CCGCmode, code = LE;
23903 break;
23904 case 'o':
23905 if (con[1] == 0)
23906 mode = CCOmode, code = EQ;
23907 break;
23908 case 'p':
23909 if (con[1] == 0)
23910 mode = CCPmode, code = EQ;
23911 break;
23912 case 's':
23913 if (con[1] == 0)
23914 mode = CCSmode, code = EQ;
23915 break;
23916 case 'z':
23917 if (con[1] == 0)
23918 mode = CCZmode, code = EQ;
23919 break;
23921 if (code == UNKNOWN)
23923 error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
23924 continue;
23926 if (invert)
23927 code = reverse_condition (code);
23929 rtx dest = outputs[i];
23930 if (!saw_asm_flag)
23932 /* This is the first asm flag output. Here we put the flags
23933 register in as the real output and adjust the condition to
23934 allow it. */
23935 constraints[i] = "=Bf";
23936 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
23937 saw_asm_flag = true;
23939 else
23941 /* We don't need the flags register as output twice. */
23942 constraints[i] = "=X";
23943 outputs[i] = gen_rtx_SCRATCH (SImode);
23946 rtx x = gen_rtx_REG (mode, FLAGS_REG);
23947 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
23949 machine_mode dest_mode = GET_MODE (dest);
23950 if (!SCALAR_INT_MODE_P (dest_mode))
23952 error_at (loc, "invalid type for %<asm%> flag output");
23953 continue;
23956 if (dest_mode == QImode)
23957 emit_insn (gen_rtx_SET (dest, x));
23958 else
23960 rtx reg = gen_reg_rtx (QImode);
23961 emit_insn (gen_rtx_SET (reg, x));
23963 reg = convert_to_mode (dest_mode, reg, 1);
23964 emit_move_insn (dest, reg);
23968 rtx_insn *seq = get_insns ();
23969 end_sequence ();
23971 if (saw_asm_flag)
23972 return seq;
23973 else
23975 /* If we had no asm flag outputs, clobber the flags. */
23976 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
23977 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
23978 return NULL;
23982 /* Implements target vector targetm.asm.encode_section_info. */
23984 static void ATTRIBUTE_UNUSED
23985 ix86_encode_section_info (tree decl, rtx rtl, int first)
23987 default_encode_section_info (decl, rtl, first);
23989 if (ix86_in_large_data_p (decl))
23990 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
23993 /* Worker function for REVERSE_CONDITION. */
23995 enum rtx_code
23996 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
23998 return (mode == CCFPmode
23999 ? reverse_condition_maybe_unordered (code)
24000 : reverse_condition (code));
24003 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24004 to OPERANDS[0]. */
24006 const char *
24007 output_387_reg_move (rtx_insn *insn, rtx *operands)
24009 if (REG_P (operands[0]))
24011 if (REG_P (operands[1])
24012 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24014 if (REGNO (operands[0]) == FIRST_STACK_REG)
24015 return output_387_ffreep (operands, 0);
24016 return "fstp\t%y0";
24018 if (STACK_TOP_P (operands[0]))
24019 return "fld%Z1\t%y1";
24020 return "fst\t%y0";
24022 else if (MEM_P (operands[0]))
24024 gcc_assert (REG_P (operands[1]));
24025 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24026 return "fstp%Z0\t%y0";
24027 else
24029 /* There is no non-popping store to memory for XFmode.
24030 So if we need one, follow the store with a load. */
24031 if (GET_MODE (operands[0]) == XFmode)
24032 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
24033 else
24034 return "fst%Z0\t%y0";
24037 else
24038 gcc_unreachable();
24040 #ifdef TARGET_SOLARIS
24041 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24043 static void
24044 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24045 tree decl)
24047 /* With Binutils 2.15, the "@unwind" marker must be specified on
24048 every occurrence of the ".eh_frame" section, not just the first
24049 one. */
24050 if (TARGET_64BIT
24051 && strcmp (name, ".eh_frame") == 0)
24053 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24054 flags & SECTION_WRITE ? "aw" : "a");
24055 return;
24058 #ifndef USE_GAS
24059 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
24061 solaris_elf_asm_comdat_section (name, flags, decl);
24062 return;
24065 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
24066 SPARC assembler. One cannot mix single-letter flags and #exclude, so
24067 only emit the latter here. */
24068 if (flags & SECTION_EXCLUDE)
24070 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
24071 return;
24073 #endif
24075 default_elf_asm_named_section (name, flags, decl);
24077 #endif /* TARGET_SOLARIS */
24079 /* Return the mangling of TYPE if it is an extended fundamental type. */
24081 static const char *
24082 ix86_mangle_type (const_tree type)
24084 type = TYPE_MAIN_VARIANT (type);
24086 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24087 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24088 return NULL;
24090 if (type == float128_type_node || type == float64x_type_node)
24091 return NULL;
24093 switch (TYPE_MODE (type))
24095 case E_BFmode:
24096 return "DF16b";
24097 case E_HFmode:
24098 /* _Float16 is "DF16_".
24099 Align with clang's decision in https://reviews.llvm.org/D33719. */
24100 return "DF16_";
24101 case E_TFmode:
24102 /* __float128 is "g". */
24103 return "g";
24104 case E_XFmode:
24105 /* "long double" or __float80 is "e". */
24106 return "e";
24107 default:
24108 return NULL;
24112 /* Create C++ tinfo symbols for only conditionally available fundamental
24113 types. */
24115 static void
24116 ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
24118 extern tree ix86_float16_type_node;
24119 extern tree ix86_bf16_type_node;
24121 if (!TARGET_SSE2)
24123 if (!float16_type_node)
24124 float16_type_node = ix86_float16_type_node;
24125 if (!bfloat16_type_node)
24126 bfloat16_type_node = ix86_bf16_type_node;
24127 callback (float16_type_node);
24128 callback (bfloat16_type_node);
24129 float16_type_node = NULL_TREE;
24130 bfloat16_type_node = NULL_TREE;
24134 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
24136 static tree
24137 ix86_stack_protect_guard (void)
24139 if (TARGET_SSP_TLS_GUARD)
24141 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
24142 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
24143 tree type = build_qualified_type (type_node, qual);
24144 tree t;
24146 if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
24148 t = ix86_tls_stack_chk_guard_decl;
24150 if (t == NULL)
24152 rtx x;
24154 t = build_decl
24155 (UNKNOWN_LOCATION, VAR_DECL,
24156 get_identifier (ix86_stack_protector_guard_symbol_str),
24157 type);
24158 TREE_STATIC (t) = 1;
24159 TREE_PUBLIC (t) = 1;
24160 DECL_EXTERNAL (t) = 1;
24161 TREE_USED (t) = 1;
24162 TREE_THIS_VOLATILE (t) = 1;
24163 DECL_ARTIFICIAL (t) = 1;
24164 DECL_IGNORED_P (t) = 1;
24166 /* Do not share RTL as the declaration is visible outside of
24167 current function. */
24168 x = DECL_RTL (t);
24169 RTX_FLAG (x, used) = 1;
24171 ix86_tls_stack_chk_guard_decl = t;
24174 else
24176 tree asptrtype = build_pointer_type (type);
24178 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
24179 t = build2 (MEM_REF, asptrtype, t,
24180 build_int_cst (asptrtype, 0));
24181 TREE_THIS_VOLATILE (t) = 1;
24184 return t;
24187 return default_stack_protect_guard ();
24190 /* For 32-bit code we can save PIC register setup by using
24191 __stack_chk_fail_local hidden function instead of calling
24192 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24193 register, so it is better to call __stack_chk_fail directly. */
24195 static tree ATTRIBUTE_UNUSED
24196 ix86_stack_protect_fail (void)
24198 return TARGET_64BIT
24199 ? default_external_stack_protect_fail ()
24200 : default_hidden_stack_protect_fail ();
24203 /* Select a format to encode pointers in exception handling data. CODE
24204 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24205 true if the symbol may be affected by dynamic relocations.
24207 ??? All x86 object file formats are capable of representing this.
24208 After all, the relocation needed is the same as for the call insn.
24209 Whether or not a particular assembler allows us to enter such, I
24210 guess we'll have to see. */
24213 asm_preferred_eh_data_format (int code, int global)
24215 /* PE-COFF is effectively always -fPIC because of the .reloc section. */
24216 if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
24218 int type = DW_EH_PE_sdata8;
24219 if (ptr_mode == SImode
24220 || ix86_cmodel == CM_SMALL_PIC
24221 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24222 type = DW_EH_PE_sdata4;
24223 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24226 if (ix86_cmodel == CM_SMALL
24227 || (ix86_cmodel == CM_MEDIUM && code))
24228 return DW_EH_PE_udata4;
24230 return DW_EH_PE_absptr;
24233 /* Implement targetm.vectorize.builtin_vectorization_cost. */
24234 static int
24235 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
24236 tree vectype, int)
24238 bool fp = false;
24239 machine_mode mode = TImode;
24240 int index;
24241 if (vectype != NULL)
24243 fp = FLOAT_TYPE_P (vectype);
24244 mode = TYPE_MODE (vectype);
24247 switch (type_of_cost)
24249 case scalar_stmt:
24250 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
24252 case scalar_load:
24253 /* load/store costs are relative to register move which is 2. Recompute
24254 it to COSTS_N_INSNS so everything have same base. */
24255 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
24256 : ix86_cost->int_load [2]) / 2;
24258 case scalar_store:
24259 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
24260 : ix86_cost->int_store [2]) / 2;
24262 case vector_stmt:
24263 return ix86_vec_cost (mode,
24264 fp ? ix86_cost->addss : ix86_cost->sse_op);
24266 case vector_load:
24267 index = sse_store_index (mode);
24268 /* See PR82713 - we may end up being called on non-vector type. */
24269 if (index < 0)
24270 index = 2;
24271 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
24273 case vector_store:
24274 index = sse_store_index (mode);
24275 /* See PR82713 - we may end up being called on non-vector type. */
24276 if (index < 0)
24277 index = 2;
24278 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
24280 case vec_to_scalar:
24281 case scalar_to_vec:
24282 return ix86_vec_cost (mode, ix86_cost->sse_op);
24284 /* We should have separate costs for unaligned loads and gather/scatter.
24285 Do that incrementally. */
24286 case unaligned_load:
24287 index = sse_store_index (mode);
24288 /* See PR82713 - we may end up being called on non-vector type. */
24289 if (index < 0)
24290 index = 2;
24291 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
24293 case unaligned_store:
24294 index = sse_store_index (mode);
24295 /* See PR82713 - we may end up being called on non-vector type. */
24296 if (index < 0)
24297 index = 2;
24298 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
24300 case vector_gather_load:
24301 return ix86_vec_cost (mode,
24302 COSTS_N_INSNS
24303 (ix86_cost->gather_static
24304 + ix86_cost->gather_per_elt
24305 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
24307 case vector_scatter_store:
24308 return ix86_vec_cost (mode,
24309 COSTS_N_INSNS
24310 (ix86_cost->scatter_static
24311 + ix86_cost->scatter_per_elt
24312 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
24314 case cond_branch_taken:
24315 return ix86_cost->cond_taken_branch_cost;
24317 case cond_branch_not_taken:
24318 return ix86_cost->cond_not_taken_branch_cost;
24320 case vec_perm:
24321 case vec_promote_demote:
24322 return ix86_vec_cost (mode, ix86_cost->sse_op);
24324 case vec_construct:
24326 int n = TYPE_VECTOR_SUBPARTS (vectype);
24327 /* N - 1 element inserts into an SSE vector, the possible
24328 GPR -> XMM move is accounted for in add_stmt_cost. */
24329 if (GET_MODE_BITSIZE (mode) <= 128)
24330 return (n - 1) * ix86_cost->sse_op;
24331 /* One vinserti128 for combining two SSE vectors for AVX256. */
24332 else if (GET_MODE_BITSIZE (mode) == 256)
24333 return ((n - 2) * ix86_cost->sse_op
24334 + ix86_vec_cost (mode, ix86_cost->addss));
24335 /* One vinserti64x4 and two vinserti128 for combining SSE
24336 and AVX256 vectors to AVX512. */
24337 else if (GET_MODE_BITSIZE (mode) == 512)
24338 return ((n - 4) * ix86_cost->sse_op
24339 + 3 * ix86_vec_cost (mode, ix86_cost->addss));
24340 gcc_unreachable ();
24343 default:
24344 gcc_unreachable ();
24349 /* This function returns the calling abi specific va_list type node.
24350 It returns the FNDECL specific va_list type. */
24352 static tree
24353 ix86_fn_abi_va_list (tree fndecl)
24355 if (!TARGET_64BIT)
24356 return va_list_type_node;
24357 gcc_assert (fndecl != NULL_TREE);
24359 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
24360 return ms_va_list_type_node;
24361 else
24362 return sysv_va_list_type_node;
24365 /* Returns the canonical va_list type specified by TYPE. If there
24366 is no valid TYPE provided, it return NULL_TREE. */
24368 static tree
24369 ix86_canonical_va_list_type (tree type)
24371 if (TARGET_64BIT)
24373 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
24374 return ms_va_list_type_node;
24376 if ((TREE_CODE (type) == ARRAY_TYPE
24377 && integer_zerop (array_type_nelts (type)))
24378 || POINTER_TYPE_P (type))
24380 tree elem_type = TREE_TYPE (type);
24381 if (TREE_CODE (elem_type) == RECORD_TYPE
24382 && lookup_attribute ("sysv_abi va_list",
24383 TYPE_ATTRIBUTES (elem_type)))
24384 return sysv_va_list_type_node;
24387 return NULL_TREE;
24390 return std_canonical_va_list_type (type);
24393 /* Iterate through the target-specific builtin types for va_list.
24394 IDX denotes the iterator, *PTREE is set to the result type of
24395 the va_list builtin, and *PNAME to its internal type.
24396 Returns zero if there is no element for this index, otherwise
24397 IDX should be increased upon the next call.
24398 Note, do not iterate a base builtin's name like __builtin_va_list.
24399 Used from c_common_nodes_and_builtins. */
24401 static int
24402 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
24404 if (TARGET_64BIT)
24406 switch (idx)
24408 default:
24409 break;
24411 case 0:
24412 *ptree = ms_va_list_type_node;
24413 *pname = "__builtin_ms_va_list";
24414 return 1;
24416 case 1:
24417 *ptree = sysv_va_list_type_node;
24418 *pname = "__builtin_sysv_va_list";
24419 return 1;
24423 return 0;
24426 #undef TARGET_SCHED_DISPATCH
24427 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
24428 #undef TARGET_SCHED_DISPATCH_DO
24429 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
24430 #undef TARGET_SCHED_REASSOCIATION_WIDTH
24431 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
24432 #undef TARGET_SCHED_REORDER
24433 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
24434 #undef TARGET_SCHED_ADJUST_PRIORITY
24435 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
24436 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
24437 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
24438 ix86_dependencies_evaluation_hook
24441 /* Implementation of reassociation_width target hook used by
24442 reassoc phase to identify parallelism level in reassociated
24443 tree. Statements tree_code is passed in OPC. Arguments type
24444 is passed in MODE. */
24446 static int
24447 ix86_reassociation_width (unsigned int op, machine_mode mode)
24449 int width = 1;
24450 /* Vector part. */
24451 if (VECTOR_MODE_P (mode))
24453 int div = 1;
24454 if (INTEGRAL_MODE_P (mode))
24455 width = ix86_cost->reassoc_vec_int;
24456 else if (FLOAT_MODE_P (mode))
24457 width = ix86_cost->reassoc_vec_fp;
24459 if (width == 1)
24460 return 1;
24462 /* Integer vector instructions execute in FP unit
24463 and can execute 3 additions and one multiplication per cycle. */
24464 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
24465 || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4)
24466 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
24467 return 1;
24469 /* Account for targets that splits wide vectors into multiple parts. */
24470 if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
24471 div = GET_MODE_BITSIZE (mode) / 256;
24472 else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
24473 div = GET_MODE_BITSIZE (mode) / 128;
24474 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
24475 div = GET_MODE_BITSIZE (mode) / 64;
24476 width = (width + div - 1) / div;
24478 /* Scalar part. */
24479 else if (INTEGRAL_MODE_P (mode))
24480 width = ix86_cost->reassoc_int;
24481 else if (FLOAT_MODE_P (mode))
24482 width = ix86_cost->reassoc_fp;
24484 /* Avoid using too many registers in 32bit mode. */
24485 if (!TARGET_64BIT && width > 2)
24486 width = 2;
24487 return width;
24490 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
24491 place emms and femms instructions. */
24493 static machine_mode
24494 ix86_preferred_simd_mode (scalar_mode mode)
24496 if (!TARGET_SSE)
24497 return word_mode;
24499 switch (mode)
24501 case E_QImode:
24502 if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24503 return V64QImode;
24504 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24505 return V32QImode;
24506 else
24507 return V16QImode;
24509 case E_HImode:
24510 if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24511 return V32HImode;
24512 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24513 return V16HImode;
24514 else
24515 return V8HImode;
24517 case E_SImode:
24518 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24519 return V16SImode;
24520 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24521 return V8SImode;
24522 else
24523 return V4SImode;
24525 case E_DImode:
24526 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24527 return V8DImode;
24528 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24529 return V4DImode;
24530 else
24531 return V2DImode;
24533 case E_HFmode:
24534 if (TARGET_AVX512FP16)
24536 if (TARGET_AVX512VL)
24538 if (TARGET_PREFER_AVX128)
24539 return V8HFmode;
24540 else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512)
24541 return V16HFmode;
24543 if (TARGET_EVEX512)
24544 return V32HFmode;
24546 return word_mode;
24548 case E_SFmode:
24549 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24550 return V16SFmode;
24551 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24552 return V8SFmode;
24553 else
24554 return V4SFmode;
24556 case E_DFmode:
24557 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24558 return V8DFmode;
24559 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24560 return V4DFmode;
24561 else if (TARGET_SSE2)
24562 return V2DFmode;
24563 /* FALLTHRU */
24565 default:
24566 return word_mode;
24570 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
24571 vectors. If AVX512F is enabled then try vectorizing with 512bit,
24572 256bit and 128bit vectors. */
24574 static unsigned int
24575 ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
24577 if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
24579 modes->safe_push (V64QImode);
24580 modes->safe_push (V32QImode);
24581 modes->safe_push (V16QImode);
24583 else if (TARGET_AVX512F && TARGET_EVEX512 && all)
24585 modes->safe_push (V32QImode);
24586 modes->safe_push (V16QImode);
24587 modes->safe_push (V64QImode);
24589 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
24591 modes->safe_push (V32QImode);
24592 modes->safe_push (V16QImode);
24594 else if (TARGET_AVX && all)
24596 modes->safe_push (V16QImode);
24597 modes->safe_push (V32QImode);
24599 else if (TARGET_SSE2)
24600 modes->safe_push (V16QImode);
24602 if (TARGET_MMX_WITH_SSE)
24603 modes->safe_push (V8QImode);
24605 if (TARGET_SSE2)
24606 modes->safe_push (V4QImode);
24608 return 0;
24611 /* Implemenation of targetm.vectorize.get_mask_mode. */
24613 static opt_machine_mode
24614 ix86_get_mask_mode (machine_mode data_mode)
24616 unsigned vector_size = GET_MODE_SIZE (data_mode);
24617 unsigned nunits = GET_MODE_NUNITS (data_mode);
24618 unsigned elem_size = vector_size / nunits;
24620 /* Scalar mask case. */
24621 if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64)
24622 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
24623 /* AVX512FP16 only supports vector comparison
24624 to kmask for _Float16. */
24625 || (TARGET_AVX512VL && TARGET_AVX512FP16
24626 && GET_MODE_INNER (data_mode) == E_HFmode))
24628 if (elem_size == 4
24629 || elem_size == 8
24630 || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
24631 return smallest_int_mode_for_size (nunits);
24634 scalar_int_mode elem_mode
24635 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
24637 gcc_assert (elem_size * nunits == vector_size);
24639 return mode_for_vector (elem_mode, nunits);
24644 /* Return class of registers which could be used for pseudo of MODE
24645 and of class RCLASS for spilling instead of memory. Return NO_REGS
24646 if it is not possible or non-profitable. */
24648 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
24650 static reg_class_t
24651 ix86_spill_class (reg_class_t rclass, machine_mode mode)
24653 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
24654 && TARGET_SSE2
24655 && TARGET_INTER_UNIT_MOVES_TO_VEC
24656 && TARGET_INTER_UNIT_MOVES_FROM_VEC
24657 && (mode == SImode || (TARGET_64BIT && mode == DImode))
24658 && INTEGER_CLASS_P (rclass))
24659 return ALL_SSE_REGS;
24660 return NO_REGS;
24663 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
24664 but returns a lower bound. */
24666 static unsigned int
24667 ix86_max_noce_ifcvt_seq_cost (edge e)
24669 bool predictable_p = predictable_edge_p (e);
24670 if (predictable_p)
24672 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
24673 return param_max_rtl_if_conversion_predictable_cost;
24675 else
24677 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
24678 return param_max_rtl_if_conversion_unpredictable_cost;
24681 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
24684 /* Return true if SEQ is a good candidate as a replacement for the
24685 if-convertible sequence described in IF_INFO. */
24687 static bool
24688 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
24690 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
24692 int cmov_cnt = 0;
24693 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
24694 Maybe we should allow even more conditional moves as long as they
24695 are used far enough not to stall the CPU, or also consider
24696 IF_INFO->TEST_BB succ edge probabilities. */
24697 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
24699 rtx set = single_set (insn);
24700 if (!set)
24701 continue;
24702 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
24703 continue;
24704 rtx src = SET_SRC (set);
24705 machine_mode mode = GET_MODE (src);
24706 if (GET_MODE_CLASS (mode) != MODE_INT
24707 && GET_MODE_CLASS (mode) != MODE_FLOAT)
24708 continue;
24709 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
24710 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
24711 continue;
24712 /* insn is CMOV or FCMOV. */
24713 if (++cmov_cnt > 1)
24714 return false;
24717 return default_noce_conversion_profitable_p (seq, if_info);
24720 /* x86-specific vector costs. */
24721 class ix86_vector_costs : public vector_costs
24723 public:
24724 ix86_vector_costs (vec_info *, bool);
24726 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
24727 stmt_vec_info stmt_info, slp_tree node,
24728 tree vectype, int misalign,
24729 vect_cost_model_location where) override;
24730 void finish_cost (const vector_costs *) override;
24732 private:
24734 /* Estimate register pressure of the vectorized code. */
24735 void ix86_vect_estimate_reg_pressure ();
24736 /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
24737 estimation of register pressure.
24738 ??? Currently it's only used by vec_construct/scalar_to_vec
24739 where we know it's not loaded from memory. */
24740 unsigned m_num_gpr_needed[3];
24741 unsigned m_num_sse_needed[3];
24744 ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
24745 : vector_costs (vinfo, costing_for_scalar),
24746 m_num_gpr_needed (),
24747 m_num_sse_needed ()
24751 /* Implement targetm.vectorize.create_costs. */
24753 static vector_costs *
24754 ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
24756 return new ix86_vector_costs (vinfo, costing_for_scalar);
24759 unsigned
24760 ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
24761 stmt_vec_info stmt_info, slp_tree node,
24762 tree vectype, int misalign,
24763 vect_cost_model_location where)
24765 unsigned retval = 0;
24766 bool scalar_p
24767 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
24768 int stmt_cost = - 1;
24770 bool fp = false;
24771 machine_mode mode = scalar_p ? SImode : TImode;
24773 if (vectype != NULL)
24775 fp = FLOAT_TYPE_P (vectype);
24776 mode = TYPE_MODE (vectype);
24777 if (scalar_p)
24778 mode = TYPE_MODE (TREE_TYPE (vectype));
24781 if ((kind == vector_stmt || kind == scalar_stmt)
24782 && stmt_info
24783 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
24785 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
24786 /*machine_mode inner_mode = mode;
24787 if (VECTOR_MODE_P (mode))
24788 inner_mode = GET_MODE_INNER (mode);*/
24790 switch (subcode)
24792 case PLUS_EXPR:
24793 case POINTER_PLUS_EXPR:
24794 case MINUS_EXPR:
24795 if (kind == scalar_stmt)
24797 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
24798 stmt_cost = ix86_cost->addss;
24799 else if (X87_FLOAT_MODE_P (mode))
24800 stmt_cost = ix86_cost->fadd;
24801 else
24802 stmt_cost = ix86_cost->add;
24804 else
24805 stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
24806 : ix86_cost->sse_op);
24807 break;
24809 case MULT_EXPR:
24810 /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
24811 take it as MULT_EXPR. */
24812 case MULT_HIGHPART_EXPR:
24813 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
24814 break;
24815 /* There's no direct instruction for WIDEN_MULT_EXPR,
24816 take emulation into account. */
24817 case WIDEN_MULT_EXPR:
24818 stmt_cost = ix86_widen_mult_cost (ix86_cost, mode,
24819 TYPE_UNSIGNED (vectype));
24820 break;
24822 case NEGATE_EXPR:
24823 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
24824 stmt_cost = ix86_cost->sse_op;
24825 else if (X87_FLOAT_MODE_P (mode))
24826 stmt_cost = ix86_cost->fchs;
24827 else if (VECTOR_MODE_P (mode))
24828 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
24829 else
24830 stmt_cost = ix86_cost->add;
24831 break;
24832 case TRUNC_DIV_EXPR:
24833 case CEIL_DIV_EXPR:
24834 case FLOOR_DIV_EXPR:
24835 case ROUND_DIV_EXPR:
24836 case TRUNC_MOD_EXPR:
24837 case CEIL_MOD_EXPR:
24838 case FLOOR_MOD_EXPR:
24839 case RDIV_EXPR:
24840 case ROUND_MOD_EXPR:
24841 case EXACT_DIV_EXPR:
24842 stmt_cost = ix86_division_cost (ix86_cost, mode);
24843 break;
24845 case RSHIFT_EXPR:
24846 case LSHIFT_EXPR:
24847 case LROTATE_EXPR:
24848 case RROTATE_EXPR:
24850 tree op1 = gimple_assign_rhs1 (stmt_info->stmt);
24851 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
24852 stmt_cost = ix86_shift_rotate_cost
24853 (ix86_cost,
24854 (subcode == RSHIFT_EXPR
24855 && !TYPE_UNSIGNED (TREE_TYPE (op1)))
24856 ? ASHIFTRT : LSHIFTRT, mode,
24857 TREE_CODE (op2) == INTEGER_CST,
24858 cst_and_fits_in_hwi (op2)
24859 ? int_cst_value (op2) : -1,
24860 false, false, NULL, NULL);
24862 break;
24863 case NOP_EXPR:
24864 /* Only sign-conversions are free. */
24865 if (tree_nop_conversion_p
24866 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
24867 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
24868 stmt_cost = 0;
24869 break;
24871 case BIT_IOR_EXPR:
24872 case ABS_EXPR:
24873 case ABSU_EXPR:
24874 case MIN_EXPR:
24875 case MAX_EXPR:
24876 case BIT_XOR_EXPR:
24877 case BIT_AND_EXPR:
24878 case BIT_NOT_EXPR:
24879 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
24880 stmt_cost = ix86_cost->sse_op;
24881 else if (VECTOR_MODE_P (mode))
24882 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
24883 else
24884 stmt_cost = ix86_cost->add;
24885 break;
24886 default:
24887 break;
24891 combined_fn cfn;
24892 if ((kind == vector_stmt || kind == scalar_stmt)
24893 && stmt_info
24894 && stmt_info->stmt
24895 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
24896 switch (cfn)
24898 case CFN_FMA:
24899 stmt_cost = ix86_vec_cost (mode,
24900 mode == SFmode ? ix86_cost->fmass
24901 : ix86_cost->fmasd);
24902 break;
24903 case CFN_MULH:
24904 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
24905 break;
24906 default:
24907 break;
24910 /* If we do elementwise loads into a vector then we are bound by
24911 latency and execution resources for the many scalar loads
24912 (AGU and load ports). Try to account for this by scaling the
24913 construction cost by the number of elements involved. */
24914 if ((kind == vec_construct || kind == vec_to_scalar)
24915 && stmt_info
24916 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
24917 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
24918 && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
24919 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
24920 != INTEGER_CST))
24921 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER))
24923 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
24924 stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
24926 else if ((kind == vec_construct || kind == scalar_to_vec)
24927 && node
24928 && SLP_TREE_DEF_TYPE (node) == vect_external_def)
24930 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
24931 unsigned i;
24932 tree op;
24933 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
24934 if (TREE_CODE (op) == SSA_NAME)
24935 TREE_VISITED (op) = 0;
24936 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
24938 if (TREE_CODE (op) != SSA_NAME
24939 || TREE_VISITED (op))
24940 continue;
24941 TREE_VISITED (op) = 1;
24942 gimple *def = SSA_NAME_DEF_STMT (op);
24943 tree tem;
24944 if (is_gimple_assign (def)
24945 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
24946 && ((tem = gimple_assign_rhs1 (def)), true)
24947 && TREE_CODE (tem) == SSA_NAME
24948 /* A sign-change expands to nothing. */
24949 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)),
24950 TREE_TYPE (tem)))
24951 def = SSA_NAME_DEF_STMT (tem);
24952 /* When the component is loaded from memory we can directly
24953 move it to a vector register, otherwise we have to go
24954 via a GPR or via vpinsr which involves similar cost.
24955 Likewise with a BIT_FIELD_REF extracting from a vector
24956 register we can hope to avoid using a GPR. */
24957 if (!is_gimple_assign (def)
24958 || ((!gimple_assign_load_p (def)
24959 || (!TARGET_SSE4_1
24960 && GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) == 1))
24961 && (gimple_assign_rhs_code (def) != BIT_FIELD_REF
24962 || !VECTOR_TYPE_P (TREE_TYPE
24963 (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))))
24965 if (fp)
24966 m_num_sse_needed[where]++;
24967 else
24969 m_num_gpr_needed[where]++;
24970 stmt_cost += ix86_cost->sse_to_integer;
24974 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
24975 if (TREE_CODE (op) == SSA_NAME)
24976 TREE_VISITED (op) = 0;
24978 if (stmt_cost == -1)
24979 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
24981 /* Penalize DFmode vector operations for Bonnell. */
24982 if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
24983 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
24984 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
24986 /* Statements in an inner loop relative to the loop being
24987 vectorized are weighted more heavily. The value here is
24988 arbitrary and could potentially be improved with analysis. */
24989 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
24991 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
24992 for Silvermont as it has out of order integer pipeline and can execute
24993 2 scalar instruction per tick, but has in order SIMD pipeline. */
24994 if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
24995 || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
24996 && stmt_info && stmt_info->stmt)
24998 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
24999 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
25000 retval = (retval * 17) / 10;
25003 m_costs[where] += retval;
25005 return retval;
25008 void
25009 ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
25011 unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
25012 unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
25014 /* Any better way to have target available fp registers, currently use SSE_REGS. */
25015 unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
25016 for (unsigned i = 0; i != 3; i++)
25018 if (m_num_gpr_needed[i] > target_avail_regs)
25019 m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs);
25020 /* Only measure sse registers pressure. */
25021 if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
25022 m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse);
25026 void
25027 ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
25029 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
25030 if (loop_vinfo && !m_costing_for_scalar)
25032 /* We are currently not asking the vectorizer to compare costs
25033 between different vector mode sizes. When using predication
25034 that will end up always choosing the prefered mode size even
25035 if there's a smaller mode covering all lanes. Test for this
25036 situation and artificially reject the larger mode attempt.
25037 ??? We currently lack masked ops for sub-SSE sized modes,
25038 so we could restrict this rejection to AVX and AVX512 modes
25039 but error on the safe side for now. */
25040 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
25041 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
25042 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
25043 && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
25044 > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
25045 m_costs[vect_body] = INT_MAX;
25048 ix86_vect_estimate_reg_pressure ();
25050 vector_costs::finish_cost (scalar_costs);
25053 /* Validate target specific memory model bits in VAL. */
25055 static unsigned HOST_WIDE_INT
25056 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
25058 enum memmodel model = memmodel_from_int (val);
25059 bool strong;
25061 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
25062 |MEMMODEL_MASK)
25063 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
25065 warning (OPT_Winvalid_memory_model,
25066 "unknown architecture specific memory model");
25067 return MEMMODEL_SEQ_CST;
25069 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
25070 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
25072 warning (OPT_Winvalid_memory_model,
25073 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
25074 "memory model");
25075 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
25077 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
25079 warning (OPT_Winvalid_memory_model,
25080 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
25081 "memory model");
25082 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
25084 return val;
25087 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
25088 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
25089 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
25090 or number of vecsize_mangle variants that should be emitted. */
25092 static int
25093 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
25094 struct cgraph_simd_clone *clonei,
25095 tree base_type, int num,
25096 bool explicit_p)
25098 int ret = 1;
25100 if (clonei->simdlen
25101 && (clonei->simdlen < 2
25102 || clonei->simdlen > 1024
25103 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
25105 if (explicit_p)
25106 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
25107 "unsupported simdlen %wd", clonei->simdlen.to_constant ());
25108 return 0;
25111 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
25112 if (TREE_CODE (ret_type) != VOID_TYPE)
25113 switch (TYPE_MODE (ret_type))
25115 case E_QImode:
25116 case E_HImode:
25117 case E_SImode:
25118 case E_DImode:
25119 case E_SFmode:
25120 case E_DFmode:
25121 /* case E_SCmode: */
25122 /* case E_DCmode: */
25123 if (!AGGREGATE_TYPE_P (ret_type))
25124 break;
25125 /* FALLTHRU */
25126 default:
25127 if (explicit_p)
25128 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
25129 "unsupported return type %qT for simd", ret_type);
25130 return 0;
25133 tree t;
25134 int i;
25135 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
25136 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
25138 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
25139 t && t != void_list_node; t = TREE_CHAIN (t), i++)
25141 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
25142 switch (TYPE_MODE (arg_type))
25144 case E_QImode:
25145 case E_HImode:
25146 case E_SImode:
25147 case E_DImode:
25148 case E_SFmode:
25149 case E_DFmode:
25150 /* case E_SCmode: */
25151 /* case E_DCmode: */
25152 if (!AGGREGATE_TYPE_P (arg_type))
25153 break;
25154 /* FALLTHRU */
25155 default:
25156 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
25157 break;
25158 if (explicit_p)
25159 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
25160 "unsupported argument type %qT for simd", arg_type);
25161 return 0;
25165 if (!TREE_PUBLIC (node->decl) || !explicit_p)
25167 /* If the function isn't exported, we can pick up just one ISA
25168 for the clones. */
25169 if (TARGET_AVX512F && TARGET_EVEX512)
25170 clonei->vecsize_mangle = 'e';
25171 else if (TARGET_AVX2)
25172 clonei->vecsize_mangle = 'd';
25173 else if (TARGET_AVX)
25174 clonei->vecsize_mangle = 'c';
25175 else
25176 clonei->vecsize_mangle = 'b';
25177 ret = 1;
25179 else
25181 clonei->vecsize_mangle = "bcde"[num];
25182 ret = 4;
25184 clonei->mask_mode = VOIDmode;
25185 switch (clonei->vecsize_mangle)
25187 case 'b':
25188 clonei->vecsize_int = 128;
25189 clonei->vecsize_float = 128;
25190 break;
25191 case 'c':
25192 clonei->vecsize_int = 128;
25193 clonei->vecsize_float = 256;
25194 break;
25195 case 'd':
25196 clonei->vecsize_int = 256;
25197 clonei->vecsize_float = 256;
25198 break;
25199 case 'e':
25200 clonei->vecsize_int = 512;
25201 clonei->vecsize_float = 512;
25202 if (TYPE_MODE (base_type) == QImode)
25203 clonei->mask_mode = DImode;
25204 else
25205 clonei->mask_mode = SImode;
25206 break;
25208 if (clonei->simdlen == 0)
25210 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
25211 clonei->simdlen = clonei->vecsize_int;
25212 else
25213 clonei->simdlen = clonei->vecsize_float;
25214 clonei->simdlen = clonei->simdlen
25215 / GET_MODE_BITSIZE (TYPE_MODE (base_type));
25217 else if (clonei->simdlen > 16)
25219 /* For compatibility with ICC, use the same upper bounds
25220 for simdlen. In particular, for CTYPE below, use the return type,
25221 unless the function returns void, in that case use the characteristic
25222 type. If it is possible for given SIMDLEN to pass CTYPE value
25223 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
25224 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
25225 emit corresponding clone. */
25226 tree ctype = ret_type;
25227 if (VOID_TYPE_P (ret_type))
25228 ctype = base_type;
25229 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
25230 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
25231 cnt /= clonei->vecsize_int;
25232 else
25233 cnt /= clonei->vecsize_float;
25234 if (cnt > (TARGET_64BIT ? 16 : 8))
25236 if (explicit_p)
25237 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
25238 "unsupported simdlen %wd",
25239 clonei->simdlen.to_constant ());
25240 return 0;
25243 return ret;
25246 /* If SIMD clone NODE can't be used in a vectorized loop
25247 in current function, return -1, otherwise return a badness of using it
25248 (0 if it is most desirable from vecsize_mangle point of view, 1
25249 slightly less desirable, etc.). */
25251 static int
25252 ix86_simd_clone_usable (struct cgraph_node *node)
25254 switch (node->simdclone->vecsize_mangle)
25256 case 'b':
25257 if (!TARGET_SSE2)
25258 return -1;
25259 if (!TARGET_AVX)
25260 return 0;
25261 return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1;
25262 case 'c':
25263 if (!TARGET_AVX)
25264 return -1;
25265 return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0;
25266 case 'd':
25267 if (!TARGET_AVX2)
25268 return -1;
25269 return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0;
25270 case 'e':
25271 if (!TARGET_AVX512F || !TARGET_EVEX512)
25272 return -1;
25273 return 0;
25274 default:
25275 gcc_unreachable ();
25279 /* This function adjusts the unroll factor based on
25280 the hardware capabilities. For ex, bdver3 has
25281 a loop buffer which makes unrolling of smaller
25282 loops less important. This function decides the
25283 unroll factor using number of memory references
25284 (value 32 is used) as a heuristic. */
25286 static unsigned
25287 ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
25289 basic_block *bbs;
25290 rtx_insn *insn;
25291 unsigned i;
25292 unsigned mem_count = 0;
25294 /* Unroll small size loop when unroll factor is not explicitly
25295 specified. */
25296 if (ix86_unroll_only_small_loops && !loop->unroll)
25298 if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
25299 return MIN (nunroll, ix86_cost->small_unroll_factor);
25300 else
25301 return 1;
25304 if (!TARGET_ADJUST_UNROLL)
25305 return nunroll;
25307 /* Count the number of memory references within the loop body.
25308 This value determines the unrolling factor for bdver3 and bdver4
25309 architectures. */
25310 subrtx_iterator::array_type array;
25311 bbs = get_loop_body (loop);
25312 for (i = 0; i < loop->num_nodes; i++)
25313 FOR_BB_INSNS (bbs[i], insn)
25314 if (NONDEBUG_INSN_P (insn))
25315 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
25316 if (const_rtx x = *iter)
25317 if (MEM_P (x))
25319 machine_mode mode = GET_MODE (x);
25320 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
25321 if (n_words > 4)
25322 mem_count += 2;
25323 else
25324 mem_count += 1;
25326 free (bbs);
25328 if (mem_count && mem_count <=32)
25329 return MIN (nunroll, 32 / mem_count);
25331 return nunroll;
25335 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
25337 static bool
25338 ix86_float_exceptions_rounding_supported_p (void)
25340 /* For x87 floating point with standard excess precision handling,
25341 there is no adddf3 pattern (since x87 floating point only has
25342 XFmode operations) so the default hook implementation gets this
25343 wrong. */
25344 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
25347 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
25349 static void
25350 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
25352 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
25353 return;
25354 tree exceptions_var = create_tmp_var_raw (integer_type_node);
25355 if (TARGET_80387)
25357 tree fenv_index_type = build_index_type (size_int (6));
25358 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
25359 tree fenv_var = create_tmp_var_raw (fenv_type);
25360 TREE_ADDRESSABLE (fenv_var) = 1;
25361 tree fenv_ptr = build_pointer_type (fenv_type);
25362 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
25363 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
25364 tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
25365 tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
25366 tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
25367 tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
25368 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
25369 tree hold_fnclex = build_call_expr (fnclex, 0);
25370 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
25371 NULL_TREE, NULL_TREE);
25372 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
25373 hold_fnclex);
25374 *clear = build_call_expr (fnclex, 0);
25375 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
25376 tree fnstsw_call = build_call_expr (fnstsw, 0);
25377 tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
25378 fnstsw_call, NULL_TREE, NULL_TREE);
25379 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
25380 tree update_mod = build4 (TARGET_EXPR, integer_type_node,
25381 exceptions_var, exceptions_x87,
25382 NULL_TREE, NULL_TREE);
25383 *update = build2 (COMPOUND_EXPR, integer_type_node,
25384 sw_mod, update_mod);
25385 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
25386 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
25388 if (TARGET_SSE && TARGET_SSE_MATH)
25390 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
25391 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
25392 tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
25393 tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
25394 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
25395 tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
25396 mxcsr_orig_var, stmxcsr_hold_call,
25397 NULL_TREE, NULL_TREE);
25398 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
25399 mxcsr_orig_var,
25400 build_int_cst (unsigned_type_node, 0x1f80));
25401 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
25402 build_int_cst (unsigned_type_node, 0xffffffc0));
25403 tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
25404 mxcsr_mod_var, hold_mod_val,
25405 NULL_TREE, NULL_TREE);
25406 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
25407 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
25408 hold_assign_orig, hold_assign_mod);
25409 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
25410 ldmxcsr_hold_call);
25411 if (*hold)
25412 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
25413 else
25414 *hold = hold_all;
25415 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
25416 if (*clear)
25417 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
25418 ldmxcsr_clear_call);
25419 else
25420 *clear = ldmxcsr_clear_call;
25421 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
25422 tree exceptions_sse = fold_convert (integer_type_node,
25423 stxmcsr_update_call);
25424 if (*update)
25426 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
25427 exceptions_var, exceptions_sse);
25428 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
25429 exceptions_var, exceptions_mod);
25430 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
25431 exceptions_assign);
25433 else
25434 *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
25435 exceptions_sse, NULL_TREE, NULL_TREE);
25436 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
25437 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
25438 ldmxcsr_update_call);
25440 tree atomic_feraiseexcept
25441 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
25442 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
25443 1, exceptions_var);
25444 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
25445 atomic_feraiseexcept_call);
25448 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
25449 /* For i386, common symbol is local only for non-PIE binaries. For
25450 x86-64, common symbol is local only for non-PIE binaries or linker
25451 supports copy reloc in PIE binaries. */
25453 static bool
25454 ix86_binds_local_p (const_tree exp)
25456 bool direct_extern_access
25457 = (ix86_direct_extern_access
25458 && !(VAR_OR_FUNCTION_DECL_P (exp)
25459 && lookup_attribute ("nodirect_extern_access",
25460 DECL_ATTRIBUTES (exp))));
25461 if (!direct_extern_access)
25462 ix86_has_no_direct_extern_access = true;
25463 return default_binds_local_p_3 (exp, flag_shlib != 0, true,
25464 direct_extern_access,
25465 (direct_extern_access
25466 && (!flag_pic
25467 || (TARGET_64BIT
25468 && HAVE_LD_PIE_COPYRELOC != 0))));
25471 /* If flag_pic or ix86_direct_extern_access is false, then neither
25472 local nor global relocs should be placed in readonly memory. */
25474 static int
25475 ix86_reloc_rw_mask (void)
25477 return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
25479 #endif
25481 /* Return true iff ADDR can be used as a symbolic base address. */
25483 static bool
25484 symbolic_base_address_p (rtx addr)
25486 if (GET_CODE (addr) == SYMBOL_REF)
25487 return true;
25489 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
25490 return true;
25492 return false;
25495 /* Return true iff ADDR can be used as a base address. */
25497 static bool
25498 base_address_p (rtx addr)
25500 if (REG_P (addr))
25501 return true;
25503 if (symbolic_base_address_p (addr))
25504 return true;
25506 return false;
25509 /* If MEM is in the form of [(base+symbase)+offset], extract the three
25510 parts of address and set to BASE, SYMBASE and OFFSET, otherwise
25511 return false. */
25513 static bool
25514 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
25516 rtx addr;
25518 gcc_assert (MEM_P (mem));
25520 addr = XEXP (mem, 0);
25522 if (GET_CODE (addr) == CONST)
25523 addr = XEXP (addr, 0);
25525 if (base_address_p (addr))
25527 *base = addr;
25528 *symbase = const0_rtx;
25529 *offset = const0_rtx;
25530 return true;
25533 if (GET_CODE (addr) == PLUS
25534 && base_address_p (XEXP (addr, 0)))
25536 rtx addend = XEXP (addr, 1);
25538 if (GET_CODE (addend) == CONST)
25539 addend = XEXP (addend, 0);
25541 if (CONST_INT_P (addend))
25543 *base = XEXP (addr, 0);
25544 *symbase = const0_rtx;
25545 *offset = addend;
25546 return true;
25549 /* Also accept REG + symbolic ref, with or without a CONST_INT
25550 offset. */
25551 if (REG_P (XEXP (addr, 0)))
25553 if (symbolic_base_address_p (addend))
25555 *base = XEXP (addr, 0);
25556 *symbase = addend;
25557 *offset = const0_rtx;
25558 return true;
25561 if (GET_CODE (addend) == PLUS
25562 && symbolic_base_address_p (XEXP (addend, 0))
25563 && CONST_INT_P (XEXP (addend, 1)))
25565 *base = XEXP (addr, 0);
25566 *symbase = XEXP (addend, 0);
25567 *offset = XEXP (addend, 1);
25568 return true;
25573 return false;
25576 /* Given OPERANDS of consecutive load/store, check if we can merge
25577 them into move multiple. LOAD is true if they are load instructions.
25578 MODE is the mode of memory operands. */
25580 bool
25581 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
25582 machine_mode mode)
25584 HOST_WIDE_INT offval_1, offval_2, msize;
25585 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2,
25586 symbase_1, symbase_2, offset_1, offset_2;
25588 if (load)
25590 mem_1 = operands[1];
25591 mem_2 = operands[3];
25592 reg_1 = operands[0];
25593 reg_2 = operands[2];
25595 else
25597 mem_1 = operands[0];
25598 mem_2 = operands[2];
25599 reg_1 = operands[1];
25600 reg_2 = operands[3];
25603 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
25605 if (REGNO (reg_1) != REGNO (reg_2))
25606 return false;
25608 /* Check if the addresses are in the form of [base+offset]. */
25609 if (!extract_base_offset_in_addr (mem_1, &base_1, &symbase_1, &offset_1))
25610 return false;
25611 if (!extract_base_offset_in_addr (mem_2, &base_2, &symbase_2, &offset_2))
25612 return false;
25614 /* Check if the bases are the same. */
25615 if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2))
25616 return false;
25618 offval_1 = INTVAL (offset_1);
25619 offval_2 = INTVAL (offset_2);
25620 msize = GET_MODE_SIZE (mode);
25621 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
25622 if (offval_1 + msize != offval_2)
25623 return false;
25625 return true;
25628 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
25630 static bool
25631 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
25632 optimization_type opt_type)
25634 switch (op)
25636 case asin_optab:
25637 case acos_optab:
25638 case log1p_optab:
25639 case exp_optab:
25640 case exp10_optab:
25641 case exp2_optab:
25642 case expm1_optab:
25643 case ldexp_optab:
25644 case scalb_optab:
25645 case round_optab:
25646 case lround_optab:
25647 return opt_type == OPTIMIZE_FOR_SPEED;
25649 case rint_optab:
25650 if (SSE_FLOAT_MODE_P (mode1)
25651 && TARGET_SSE_MATH
25652 && !flag_trapping_math
25653 && !TARGET_SSE4_1
25654 && mode1 != HFmode)
25655 return opt_type == OPTIMIZE_FOR_SPEED;
25656 return true;
25658 case floor_optab:
25659 case ceil_optab:
25660 case btrunc_optab:
25661 if (((SSE_FLOAT_MODE_P (mode1)
25662 && TARGET_SSE_MATH
25663 && TARGET_SSE4_1)
25664 || mode1 == HFmode)
25665 && !flag_trapping_math)
25666 return true;
25667 return opt_type == OPTIMIZE_FOR_SPEED;
25669 case rsqrt_optab:
25670 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
25672 default:
25673 return true;
25677 /* Address space support.
25679 This is not "far pointers" in the 16-bit sense, but an easy way
25680 to use %fs and %gs segment prefixes. Therefore:
25682 (a) All address spaces have the same modes,
25683 (b) All address spaces have the same addresss forms,
25684 (c) While %fs and %gs are technically subsets of the generic
25685 address space, they are probably not subsets of each other.
25686 (d) Since we have no access to the segment base register values
25687 without resorting to a system call, we cannot convert a
25688 non-default address space to a default address space.
25689 Therefore we do not claim %fs or %gs are subsets of generic.
25691 Therefore we can (mostly) use the default hooks. */
25693 /* All use of segmentation is assumed to make address 0 valid. */
25695 static bool
25696 ix86_addr_space_zero_address_valid (addr_space_t as)
25698 return as != ADDR_SPACE_GENERIC;
25701 static void
25702 ix86_init_libfuncs (void)
25704 if (TARGET_64BIT)
25706 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
25707 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
25709 else
25711 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
25712 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
25715 #if TARGET_MACHO
25716 darwin_rename_builtins ();
25717 #endif
25720 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
25721 FPU, assume that the fpcw is set to extended precision; when using
25722 only SSE, rounding is correct; when using both SSE and the FPU,
25723 the rounding precision is indeterminate, since either may be chosen
25724 apparently at random. */
25726 static enum flt_eval_method
25727 ix86_get_excess_precision (enum excess_precision_type type)
25729 switch (type)
25731 case EXCESS_PRECISION_TYPE_FAST:
25732 /* The fastest type to promote to will always be the native type,
25733 whether that occurs with implicit excess precision or
25734 otherwise. */
25735 return TARGET_AVX512FP16
25736 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25737 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
25738 case EXCESS_PRECISION_TYPE_STANDARD:
25739 case EXCESS_PRECISION_TYPE_IMPLICIT:
25740 /* Otherwise, the excess precision we want when we are
25741 in a standards compliant mode, and the implicit precision we
25742 provide would be identical were it not for the unpredictable
25743 cases. */
25744 if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
25745 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25746 else if (!TARGET_80387)
25747 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
25748 else if (!TARGET_MIX_SSE_I387)
25750 if (!(TARGET_SSE && TARGET_SSE_MATH))
25751 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
25752 else if (TARGET_SSE2)
25753 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
25756 /* If we are in standards compliant mode, but we know we will
25757 calculate in unpredictable precision, return
25758 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
25759 excess precision if the target can't guarantee it will honor
25760 it. */
25761 return (type == EXCESS_PRECISION_TYPE_STANDARD
25762 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
25763 : FLT_EVAL_METHOD_UNPREDICTABLE);
25764 case EXCESS_PRECISION_TYPE_FLOAT16:
25765 if (TARGET_80387
25766 && !(TARGET_SSE_MATH && TARGET_SSE))
25767 error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
25768 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25769 default:
25770 gcc_unreachable ();
25773 return FLT_EVAL_METHOD_UNPREDICTABLE;
25776 /* Return true if _BitInt(N) is supported and fill its details into *INFO. */
25777 bool
25778 ix86_bitint_type_info (int n, struct bitint_info *info)
25780 if (!TARGET_64BIT)
25781 return false;
25782 if (n <= 8)
25783 info->limb_mode = QImode;
25784 else if (n <= 16)
25785 info->limb_mode = HImode;
25786 else if (n <= 32)
25787 info->limb_mode = SImode;
25788 else
25789 info->limb_mode = DImode;
25790 info->abi_limb_mode = info->limb_mode;
25791 info->big_endian = false;
25792 info->extended = false;
25793 return true;
25796 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
25797 decrements by exactly 2 no matter what the position was, there is no pushb.
25799 But as CIE data alignment factor on this arch is -4 for 32bit targets
25800 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
25801 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
25803 poly_int64
25804 ix86_push_rounding (poly_int64 bytes)
25806 return ROUND_UP (bytes, UNITS_PER_WORD);
25809 /* Use 8 bits metadata start from bit48 for LAM_U48,
25810 6 bits metadat start from bit57 for LAM_U57. */
25811 #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \
25812 ? 48 \
25813 : (ix86_lam_type == lam_u57 ? 57 : 0))
25814 #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \
25815 ? 8 \
25816 : (ix86_lam_type == lam_u57 ? 6 : 0))
25818 /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */
25819 bool
25820 ix86_memtag_can_tag_addresses ()
25822 return ix86_lam_type != lam_none && TARGET_LP64;
25825 /* Implement TARGET_MEMTAG_TAG_SIZE. */
25826 unsigned char
25827 ix86_memtag_tag_size ()
25829 return IX86_HWASAN_TAG_SIZE;
25832 /* Implement TARGET_MEMTAG_SET_TAG. */
25834 ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
25836 /* default_memtag_insert_random_tag may
25837 generate tag with value more than 6 bits. */
25838 if (ix86_lam_type == lam_u57)
25840 unsigned HOST_WIDE_INT and_imm
25841 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
25843 emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
25845 tag = expand_simple_binop (Pmode, ASHIFT, tag,
25846 GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
25847 /* unsignedp = */1, OPTAB_WIDEN);
25848 rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
25849 /* unsignedp = */1, OPTAB_DIRECT);
25850 return ret;
25853 /* Implement TARGET_MEMTAG_EXTRACT_TAG. */
25855 ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
25857 rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
25858 GEN_INT (IX86_HWASAN_SHIFT), target,
25859 /* unsignedp = */0,
25860 OPTAB_DIRECT);
25861 rtx ret = gen_reg_rtx (QImode);
25862 /* Mask off bit63 when LAM_U57. */
25863 if (ix86_lam_type == lam_u57)
25865 unsigned HOST_WIDE_INT and_imm
25866 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
25867 emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
25868 gen_int_mode (and_imm, QImode)));
25870 else
25871 emit_move_insn (ret, gen_lowpart (QImode, tag));
25872 return ret;
25875 /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */
25877 ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
25879 /* Leave bit63 alone. */
25880 rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
25881 + (HOST_WIDE_INT_1U << 63) - 1),
25882 Pmode);
25883 rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
25884 tag_mask, target, true,
25885 OPTAB_DIRECT);
25886 gcc_assert (untagged_base);
25887 return untagged_base;
25890 /* Implement TARGET_MEMTAG_ADD_TAG. */
25892 ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
25894 rtx base_tag = gen_reg_rtx (QImode);
25895 rtx base_addr = gen_reg_rtx (Pmode);
25896 rtx tagged_addr = gen_reg_rtx (Pmode);
25897 rtx new_tag = gen_reg_rtx (QImode);
25898 unsigned HOST_WIDE_INT and_imm
25899 = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
25901 /* When there's "overflow" in tag adding,
25902 need to mask the most significant bit off. */
25903 emit_move_insn (base_tag, ix86_memtag_extract_tag (base, NULL_RTX));
25904 emit_move_insn (base_addr,
25905 ix86_memtag_untagged_pointer (base, NULL_RTX));
25906 emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
25907 emit_move_insn (new_tag, base_tag);
25908 emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
25909 emit_move_insn (tagged_addr,
25910 ix86_memtag_set_tag (base_addr, new_tag, NULL_RTX));
25911 return plus_constant (Pmode, tagged_addr, offset);
25914 /* Target-specific selftests. */
25916 #if CHECKING_P
25918 namespace selftest {
25920 /* Verify that hard regs are dumped as expected (in compact mode). */
25922 static void
25923 ix86_test_dumping_hard_regs ()
25925 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
25926 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
25929 /* Test dumping an insn with repeated references to the same SCRATCH,
25930 to verify the rtx_reuse code. */
25932 static void
25933 ix86_test_dumping_memory_blockage ()
25935 set_new_first_and_last_insn (NULL, NULL);
25937 rtx pat = gen_memory_blockage ();
25938 rtx_reuse_manager r;
25939 r.preprocess (pat);
25941 /* Verify that the repeated references to the SCRATCH show use
25942 reuse IDS. The first should be prefixed with a reuse ID,
25943 and the second should be dumped as a "reuse_rtx" of that ID.
25944 The expected string assumes Pmode == DImode. */
25945 if (Pmode == DImode)
25946 ASSERT_RTL_DUMP_EQ_WITH_REUSE
25947 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
25948 " (unspec:BLK [\n"
25949 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
25950 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
25953 /* Verify loading an RTL dump; specifically a dump of copying
25954 a param on x86_64 from a hard reg into the frame.
25955 This test is target-specific since the dump contains target-specific
25956 hard reg names. */
25958 static void
25959 ix86_test_loading_dump_fragment_1 ()
25961 rtl_dump_test t (SELFTEST_LOCATION,
25962 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
25964 rtx_insn *insn = get_insn_by_uid (1);
25966 /* The block structure and indentation here is purely for
25967 readability; it mirrors the structure of the rtx. */
25968 tree mem_expr;
25970 rtx pat = PATTERN (insn);
25971 ASSERT_EQ (SET, GET_CODE (pat));
25973 rtx dest = SET_DEST (pat);
25974 ASSERT_EQ (MEM, GET_CODE (dest));
25975 /* Verify the "/c" was parsed. */
25976 ASSERT_TRUE (RTX_FLAG (dest, call));
25977 ASSERT_EQ (SImode, GET_MODE (dest));
25979 rtx addr = XEXP (dest, 0);
25980 ASSERT_EQ (PLUS, GET_CODE (addr));
25981 ASSERT_EQ (DImode, GET_MODE (addr));
25983 rtx lhs = XEXP (addr, 0);
25984 /* Verify that the "frame" REG was consolidated. */
25985 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
25988 rtx rhs = XEXP (addr, 1);
25989 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
25990 ASSERT_EQ (-4, INTVAL (rhs));
25993 /* Verify the "[1 i+0 S4 A32]" was parsed. */
25994 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
25995 /* "i" should have been handled by synthesizing a global int
25996 variable named "i". */
25997 mem_expr = MEM_EXPR (dest);
25998 ASSERT_NE (mem_expr, NULL);
25999 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
26000 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
26001 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
26002 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
26003 /* "+0". */
26004 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
26005 ASSERT_EQ (0, MEM_OFFSET (dest));
26006 /* "S4". */
26007 ASSERT_EQ (4, MEM_SIZE (dest));
26008 /* "A32. */
26009 ASSERT_EQ (32, MEM_ALIGN (dest));
26012 rtx src = SET_SRC (pat);
26013 ASSERT_EQ (REG, GET_CODE (src));
26014 ASSERT_EQ (SImode, GET_MODE (src));
26015 ASSERT_EQ (5, REGNO (src));
26016 tree reg_expr = REG_EXPR (src);
26017 /* "i" here should point to the same var as for the MEM_EXPR. */
26018 ASSERT_EQ (reg_expr, mem_expr);
26023 /* Verify that the RTL loader copes with a call_insn dump.
26024 This test is target-specific since the dump contains a target-specific
26025 hard reg name. */
26027 static void
26028 ix86_test_loading_call_insn ()
26030 /* The test dump includes register "xmm0", where requires TARGET_SSE
26031 to exist. */
26032 if (!TARGET_SSE)
26033 return;
26035 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
26037 rtx_insn *insn = get_insns ();
26038 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
26040 /* "/j". */
26041 ASSERT_TRUE (RTX_FLAG (insn, jump));
26043 rtx pat = PATTERN (insn);
26044 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
26046 /* Verify REG_NOTES. */
26048 /* "(expr_list:REG_CALL_DECL". */
26049 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
26050 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
26051 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
26053 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
26054 rtx_expr_list *note1 = note0->next ();
26055 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
26057 ASSERT_EQ (NULL, note1->next ());
26060 /* Verify CALL_INSN_FUNCTION_USAGE. */
26062 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
26063 rtx_expr_list *usage
26064 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
26065 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
26066 ASSERT_EQ (DFmode, GET_MODE (usage));
26067 ASSERT_EQ (USE, GET_CODE (usage->element ()));
26068 ASSERT_EQ (NULL, usage->next ());
26072 /* Verify that the RTL loader copes a dump from print_rtx_function.
26073 This test is target-specific since the dump contains target-specific
26074 hard reg names. */
26076 static void
26077 ix86_test_loading_full_dump ()
26079 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
26081 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
26083 rtx_insn *insn_1 = get_insn_by_uid (1);
26084 ASSERT_EQ (NOTE, GET_CODE (insn_1));
26086 rtx_insn *insn_7 = get_insn_by_uid (7);
26087 ASSERT_EQ (INSN, GET_CODE (insn_7));
26088 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
26090 rtx_insn *insn_15 = get_insn_by_uid (15);
26091 ASSERT_EQ (INSN, GET_CODE (insn_15));
26092 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
26094 /* Verify crtl->return_rtx. */
26095 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
26096 ASSERT_EQ (0, REGNO (crtl->return_rtx));
26097 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
26100 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
26101 In particular, verify that it correctly loads the 2nd operand.
26102 This test is target-specific since these are machine-specific
26103 operands (and enums). */
26105 static void
26106 ix86_test_loading_unspec ()
26108 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
26110 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
26112 ASSERT_TRUE (cfun);
26114 /* Test of an UNSPEC. */
26115 rtx_insn *insn = get_insns ();
26116 ASSERT_EQ (INSN, GET_CODE (insn));
26117 rtx set = single_set (insn);
26118 ASSERT_NE (NULL, set);
26119 rtx dst = SET_DEST (set);
26120 ASSERT_EQ (MEM, GET_CODE (dst));
26121 rtx src = SET_SRC (set);
26122 ASSERT_EQ (UNSPEC, GET_CODE (src));
26123 ASSERT_EQ (BLKmode, GET_MODE (src));
26124 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
26126 rtx v0 = XVECEXP (src, 0, 0);
26128 /* Verify that the two uses of the first SCRATCH have pointer
26129 equality. */
26130 rtx scratch_a = XEXP (dst, 0);
26131 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
26133 rtx scratch_b = XEXP (v0, 0);
26134 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
26136 ASSERT_EQ (scratch_a, scratch_b);
26138 /* Verify that the two mems are thus treated as equal. */
26139 ASSERT_TRUE (rtx_equal_p (dst, v0));
26141 /* Verify that the insn is recognized. */
26142 ASSERT_NE(-1, recog_memoized (insn));
26144 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
26145 insn = NEXT_INSN (insn);
26146 ASSERT_EQ (INSN, GET_CODE (insn));
26148 set = single_set (insn);
26149 ASSERT_NE (NULL, set);
26151 src = SET_SRC (set);
26152 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
26153 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
26156 /* Run all target-specific selftests. */
26158 static void
26159 ix86_run_selftests (void)
26161 ix86_test_dumping_hard_regs ();
26162 ix86_test_dumping_memory_blockage ();
26164 /* Various tests of loading RTL dumps, here because they contain
26165 ix86-isms (e.g. names of hard regs). */
26166 ix86_test_loading_dump_fragment_1 ();
26167 ix86_test_loading_call_insn ();
26168 ix86_test_loading_full_dump ();
26169 ix86_test_loading_unspec ();
26172 } // namespace selftest
26174 #endif /* CHECKING_P */
26176 static const scoped_attribute_specs *const ix86_attribute_table[] =
26178 &ix86_gnu_attribute_table
26181 /* Initialize the GCC target structure. */
26182 #undef TARGET_RETURN_IN_MEMORY
26183 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
26185 #undef TARGET_LEGITIMIZE_ADDRESS
26186 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
26188 #undef TARGET_ATTRIBUTE_TABLE
26189 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
26190 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
26191 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
26192 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26193 # undef TARGET_MERGE_DECL_ATTRIBUTES
26194 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
26195 #endif
26197 #undef TARGET_INVALID_CONVERSION
26198 #define TARGET_INVALID_CONVERSION ix86_invalid_conversion
26200 #undef TARGET_INVALID_UNARY_OP
26201 #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
26203 #undef TARGET_INVALID_BINARY_OP
26204 #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
26206 #undef TARGET_COMP_TYPE_ATTRIBUTES
26207 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
26209 #undef TARGET_INIT_BUILTINS
26210 #define TARGET_INIT_BUILTINS ix86_init_builtins
26211 #undef TARGET_BUILTIN_DECL
26212 #define TARGET_BUILTIN_DECL ix86_builtin_decl
26213 #undef TARGET_EXPAND_BUILTIN
26214 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
26216 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
26217 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
26218 ix86_builtin_vectorized_function
26220 #undef TARGET_VECTORIZE_BUILTIN_GATHER
26221 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
26223 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
26224 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
26226 #undef TARGET_BUILTIN_RECIPROCAL
26227 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
26229 #undef TARGET_ASM_FUNCTION_EPILOGUE
26230 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
26232 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
26233 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
26234 ix86_print_patchable_function_entry
26236 #undef TARGET_ENCODE_SECTION_INFO
26237 #ifndef SUBTARGET_ENCODE_SECTION_INFO
26238 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
26239 #else
26240 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
26241 #endif
26243 #undef TARGET_ASM_OPEN_PAREN
26244 #define TARGET_ASM_OPEN_PAREN ""
26245 #undef TARGET_ASM_CLOSE_PAREN
26246 #define TARGET_ASM_CLOSE_PAREN ""
26248 #undef TARGET_ASM_BYTE_OP
26249 #define TARGET_ASM_BYTE_OP ASM_BYTE
26251 #undef TARGET_ASM_ALIGNED_HI_OP
26252 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
26253 #undef TARGET_ASM_ALIGNED_SI_OP
26254 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
26255 #ifdef ASM_QUAD
26256 #undef TARGET_ASM_ALIGNED_DI_OP
26257 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
26258 #endif
26260 #undef TARGET_PROFILE_BEFORE_PROLOGUE
26261 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
26263 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
26264 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
26266 #undef TARGET_ASM_UNALIGNED_HI_OP
26267 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
26268 #undef TARGET_ASM_UNALIGNED_SI_OP
26269 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
26270 #undef TARGET_ASM_UNALIGNED_DI_OP
26271 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
26273 #undef TARGET_PRINT_OPERAND
26274 #define TARGET_PRINT_OPERAND ix86_print_operand
26275 #undef TARGET_PRINT_OPERAND_ADDRESS
26276 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
26277 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
26278 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
26279 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
26280 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
26282 #undef TARGET_SCHED_INIT_GLOBAL
26283 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
26284 #undef TARGET_SCHED_ADJUST_COST
26285 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
26286 #undef TARGET_SCHED_ISSUE_RATE
26287 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
26288 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
26289 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
26290 ia32_multipass_dfa_lookahead
26291 #undef TARGET_SCHED_MACRO_FUSION_P
26292 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
26293 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
26294 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
26296 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
26297 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
26299 #undef TARGET_MEMMODEL_CHECK
26300 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
26302 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
26303 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
26305 #ifdef HAVE_AS_TLS
26306 #undef TARGET_HAVE_TLS
26307 #define TARGET_HAVE_TLS true
26308 #endif
26309 #undef TARGET_CANNOT_FORCE_CONST_MEM
26310 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
26311 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
26312 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
26314 #undef TARGET_DELEGITIMIZE_ADDRESS
26315 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
26317 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
26318 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
26320 #undef TARGET_MS_BITFIELD_LAYOUT_P
26321 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
26323 #if TARGET_MACHO
26324 #undef TARGET_BINDS_LOCAL_P
26325 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
26326 #else
26327 #undef TARGET_BINDS_LOCAL_P
26328 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
26329 #endif
26330 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26331 #undef TARGET_BINDS_LOCAL_P
26332 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
26333 #endif
26335 #undef TARGET_ASM_OUTPUT_MI_THUNK
26336 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
26337 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
26338 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
26340 #undef TARGET_ASM_FILE_START
26341 #define TARGET_ASM_FILE_START x86_file_start
26343 #undef TARGET_OPTION_OVERRIDE
26344 #define TARGET_OPTION_OVERRIDE ix86_option_override
26346 #undef TARGET_REGISTER_MOVE_COST
26347 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
26348 #undef TARGET_MEMORY_MOVE_COST
26349 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
26350 #undef TARGET_RTX_COSTS
26351 #define TARGET_RTX_COSTS ix86_rtx_costs
26352 #undef TARGET_ADDRESS_COST
26353 #define TARGET_ADDRESS_COST ix86_address_cost
26355 #undef TARGET_OVERLAP_OP_BY_PIECES_P
26356 #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
26358 #undef TARGET_FLAGS_REGNUM
26359 #define TARGET_FLAGS_REGNUM FLAGS_REG
26360 #undef TARGET_FIXED_CONDITION_CODE_REGS
26361 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
26362 #undef TARGET_CC_MODES_COMPATIBLE
26363 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
26365 #undef TARGET_MACHINE_DEPENDENT_REORG
26366 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
26368 #undef TARGET_BUILD_BUILTIN_VA_LIST
26369 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
26371 #undef TARGET_FOLD_BUILTIN
26372 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
26374 #undef TARGET_GIMPLE_FOLD_BUILTIN
26375 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
26377 #undef TARGET_COMPARE_VERSION_PRIORITY
26378 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
26380 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
26381 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
26382 ix86_generate_version_dispatcher_body
26384 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
26385 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
26386 ix86_get_function_versions_dispatcher
26388 #undef TARGET_ENUM_VA_LIST_P
26389 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
26391 #undef TARGET_FN_ABI_VA_LIST
26392 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
26394 #undef TARGET_CANONICAL_VA_LIST_TYPE
26395 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
26397 #undef TARGET_EXPAND_BUILTIN_VA_START
26398 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
26400 #undef TARGET_MD_ASM_ADJUST
26401 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
26403 #undef TARGET_C_EXCESS_PRECISION
26404 #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
26405 #undef TARGET_C_BITINT_TYPE_INFO
26406 #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
26407 #undef TARGET_PROMOTE_PROTOTYPES
26408 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
26409 #undef TARGET_PUSH_ARGUMENT
26410 #define TARGET_PUSH_ARGUMENT ix86_push_argument
26411 #undef TARGET_SETUP_INCOMING_VARARGS
26412 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
26413 #undef TARGET_MUST_PASS_IN_STACK
26414 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
26415 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
26416 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
26417 #undef TARGET_FUNCTION_ARG_ADVANCE
26418 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
26419 #undef TARGET_FUNCTION_ARG
26420 #define TARGET_FUNCTION_ARG ix86_function_arg
26421 #undef TARGET_INIT_PIC_REG
26422 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
26423 #undef TARGET_USE_PSEUDO_PIC_REG
26424 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
26425 #undef TARGET_FUNCTION_ARG_BOUNDARY
26426 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
26427 #undef TARGET_PASS_BY_REFERENCE
26428 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
26429 #undef TARGET_INTERNAL_ARG_POINTER
26430 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
26431 #undef TARGET_UPDATE_STACK_BOUNDARY
26432 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
26433 #undef TARGET_GET_DRAP_RTX
26434 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
26435 #undef TARGET_STRICT_ARGUMENT_NAMING
26436 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
26437 #undef TARGET_STATIC_CHAIN
26438 #define TARGET_STATIC_CHAIN ix86_static_chain
26439 #undef TARGET_TRAMPOLINE_INIT
26440 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
26441 #undef TARGET_RETURN_POPS_ARGS
26442 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
26444 #undef TARGET_WARN_FUNC_RETURN
26445 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
26447 #undef TARGET_LEGITIMATE_COMBINED_INSN
26448 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
26450 #undef TARGET_ASAN_SHADOW_OFFSET
26451 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
26453 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
26454 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
26456 #undef TARGET_SCALAR_MODE_SUPPORTED_P
26457 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
26459 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
26460 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
26461 ix86_libgcc_floating_mode_supported_p
26463 #undef TARGET_VECTOR_MODE_SUPPORTED_P
26464 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
26466 #undef TARGET_C_MODE_FOR_SUFFIX
26467 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
26469 #ifdef HAVE_AS_TLS
26470 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
26471 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
26472 #endif
26474 #ifdef SUBTARGET_INSERT_ATTRIBUTES
26475 #undef TARGET_INSERT_ATTRIBUTES
26476 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
26477 #endif
26479 #undef TARGET_MANGLE_TYPE
26480 #define TARGET_MANGLE_TYPE ix86_mangle_type
26482 #undef TARGET_EMIT_SUPPORT_TINFOS
26483 #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
26485 #undef TARGET_STACK_PROTECT_GUARD
26486 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
26488 #if !TARGET_MACHO
26489 #undef TARGET_STACK_PROTECT_FAIL
26490 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
26491 #endif
26493 #undef TARGET_FUNCTION_VALUE
26494 #define TARGET_FUNCTION_VALUE ix86_function_value
26496 #undef TARGET_FUNCTION_VALUE_REGNO_P
26497 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
26499 #undef TARGET_ZERO_CALL_USED_REGS
26500 #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
26502 #undef TARGET_PROMOTE_FUNCTION_MODE
26503 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
26505 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
26506 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
26508 #undef TARGET_MEMBER_TYPE_FORCES_BLK
26509 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
26511 #undef TARGET_INSTANTIATE_DECLS
26512 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
26514 #undef TARGET_SECONDARY_RELOAD
26515 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
26516 #undef TARGET_SECONDARY_MEMORY_NEEDED
26517 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
26518 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
26519 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
26521 #undef TARGET_CLASS_MAX_NREGS
26522 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
26524 #undef TARGET_PREFERRED_RELOAD_CLASS
26525 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
26526 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
26527 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
26528 #undef TARGET_CLASS_LIKELY_SPILLED_P
26529 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
26531 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
26532 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
26533 ix86_builtin_vectorization_cost
26534 #undef TARGET_VECTORIZE_VEC_PERM_CONST
26535 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
26536 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
26537 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
26538 ix86_preferred_simd_mode
26539 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
26540 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
26541 ix86_split_reduction
26542 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
26543 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
26544 ix86_autovectorize_vector_modes
26545 #undef TARGET_VECTORIZE_GET_MASK_MODE
26546 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
26547 #undef TARGET_VECTORIZE_CREATE_COSTS
26548 #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
26550 #undef TARGET_SET_CURRENT_FUNCTION
26551 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
26553 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
26554 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
26556 #undef TARGET_OPTION_SAVE
26557 #define TARGET_OPTION_SAVE ix86_function_specific_save
26559 #undef TARGET_OPTION_RESTORE
26560 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
26562 #undef TARGET_OPTION_POST_STREAM_IN
26563 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
26565 #undef TARGET_OPTION_PRINT
26566 #define TARGET_OPTION_PRINT ix86_function_specific_print
26568 #undef TARGET_OPTION_FUNCTION_VERSIONS
26569 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
26571 #undef TARGET_CAN_INLINE_P
26572 #define TARGET_CAN_INLINE_P ix86_can_inline_p
26574 #undef TARGET_LEGITIMATE_ADDRESS_P
26575 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
26577 #undef TARGET_REGISTER_PRIORITY
26578 #define TARGET_REGISTER_PRIORITY ix86_register_priority
26580 #undef TARGET_REGISTER_USAGE_LEVELING_P
26581 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
26583 #undef TARGET_LEGITIMATE_CONSTANT_P
26584 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
26586 #undef TARGET_COMPUTE_FRAME_LAYOUT
26587 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
26589 #undef TARGET_FRAME_POINTER_REQUIRED
26590 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
26592 #undef TARGET_CAN_ELIMINATE
26593 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
26595 #undef TARGET_EXTRA_LIVE_ON_ENTRY
26596 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
26598 #undef TARGET_ASM_CODE_END
26599 #define TARGET_ASM_CODE_END ix86_code_end
26601 #undef TARGET_CONDITIONAL_REGISTER_USAGE
26602 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
26604 #undef TARGET_CANONICALIZE_COMPARISON
26605 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
26607 #undef TARGET_LOOP_UNROLL_ADJUST
26608 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
26610 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
26611 #undef TARGET_SPILL_CLASS
26612 #define TARGET_SPILL_CLASS ix86_spill_class
26614 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
26615 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
26616 ix86_simd_clone_compute_vecsize_and_simdlen
26618 #undef TARGET_SIMD_CLONE_ADJUST
26619 #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
26621 #undef TARGET_SIMD_CLONE_USABLE
26622 #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
26624 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
26625 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
26627 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
26628 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
26629 ix86_float_exceptions_rounding_supported_p
26631 #undef TARGET_MODE_EMIT
26632 #define TARGET_MODE_EMIT ix86_emit_mode_set
26634 #undef TARGET_MODE_NEEDED
26635 #define TARGET_MODE_NEEDED ix86_mode_needed
26637 #undef TARGET_MODE_AFTER
26638 #define TARGET_MODE_AFTER ix86_mode_after
26640 #undef TARGET_MODE_ENTRY
26641 #define TARGET_MODE_ENTRY ix86_mode_entry
26643 #undef TARGET_MODE_EXIT
26644 #define TARGET_MODE_EXIT ix86_mode_exit
26646 #undef TARGET_MODE_PRIORITY
26647 #define TARGET_MODE_PRIORITY ix86_mode_priority
26649 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
26650 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
26652 #undef TARGET_OFFLOAD_OPTIONS
26653 #define TARGET_OFFLOAD_OPTIONS \
26654 ix86_offload_options
26656 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
26657 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
26659 #undef TARGET_OPTAB_SUPPORTED_P
26660 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
26662 #undef TARGET_HARD_REGNO_SCRATCH_OK
26663 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
26665 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
26666 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
26668 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
26669 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
26671 #undef TARGET_INIT_LIBFUNCS
26672 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
26674 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
26675 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
26677 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
26678 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
26680 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
26681 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
26683 #undef TARGET_HARD_REGNO_NREGS
26684 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
26685 #undef TARGET_HARD_REGNO_MODE_OK
26686 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
26688 #undef TARGET_MODES_TIEABLE_P
26689 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
26691 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
26692 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
26693 ix86_hard_regno_call_part_clobbered
26695 #undef TARGET_INSN_CALLEE_ABI
26696 #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
26698 #undef TARGET_CAN_CHANGE_MODE_CLASS
26699 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
26701 #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
26702 #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
26704 #undef TARGET_STATIC_RTX_ALIGNMENT
26705 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
26706 #undef TARGET_CONSTANT_ALIGNMENT
26707 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
26709 #undef TARGET_EMPTY_RECORD_P
26710 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
26712 #undef TARGET_WARN_PARAMETER_PASSING_ABI
26713 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
26715 #undef TARGET_GET_MULTILIB_ABI_NAME
26716 #define TARGET_GET_MULTILIB_ABI_NAME \
26717 ix86_get_multilib_abi_name
26719 #undef TARGET_IFUNC_REF_LOCAL_OK
26720 #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
26722 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
26723 # undef TARGET_ASM_RELOC_RW_MASK
26724 # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
26725 #endif
26727 #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
26728 #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
26730 #undef TARGET_MEMTAG_ADD_TAG
26731 #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
26733 #undef TARGET_MEMTAG_SET_TAG
26734 #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
26736 #undef TARGET_MEMTAG_EXTRACT_TAG
26737 #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
26739 #undef TARGET_MEMTAG_UNTAGGED_POINTER
26740 #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
26742 #undef TARGET_MEMTAG_TAG_SIZE
26743 #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
26745 static bool
26746 ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
26748 #ifdef OPTION_GLIBC
26749 if (OPTION_GLIBC)
26750 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
26751 else
26752 return false;
26753 #else
26754 return false;
26755 #endif
26758 #undef TARGET_LIBC_HAS_FAST_FUNCTION
26759 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
26761 static unsigned
26762 ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
26763 bool boundary_p)
26765 #ifdef OPTION_GLIBC
26766 bool glibc_p = OPTION_GLIBC;
26767 #else
26768 bool glibc_p = false;
26769 #endif
26770 if (glibc_p)
26772 /* If __FAST_MATH__ is defined, glibc provides libmvec. */
26773 unsigned int libmvec_ret = 0;
26774 if (!flag_trapping_math
26775 && flag_unsafe_math_optimizations
26776 && flag_finite_math_only
26777 && !flag_signed_zeros
26778 && !flag_errno_math)
26779 switch (cfn)
26781 CASE_CFN_COS:
26782 CASE_CFN_COS_FN:
26783 CASE_CFN_SIN:
26784 CASE_CFN_SIN_FN:
26785 if (!boundary_p)
26787 /* With non-default rounding modes, libmvec provides
26788 complete garbage in results. E.g.
26789 _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
26790 returns 0.00333309174f rather than 1.40129846e-45f. */
26791 if (flag_rounding_math)
26792 return ~0U;
26793 /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
26794 claims libmvec maximum error is 4ulps.
26795 My own random testing indicates 2ulps for SFmode and
26796 0.5ulps for DFmode, but let's go with the 4ulps. */
26797 libmvec_ret = 4;
26799 break;
26800 default:
26801 break;
26803 unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
26804 boundary_p);
26805 return MAX (ret, libmvec_ret);
26807 return default_libm_function_max_error (cfn, mode, boundary_p);
26810 #undef TARGET_LIBM_FUNCTION_MAX_ERROR
26811 #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
26813 #if CHECKING_P
26814 #undef TARGET_RUN_TARGET_SELFTESTS
26815 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
26816 #endif /* #if CHECKING_P */
26818 struct gcc_target targetm = TARGET_INITIALIZER;
26820 #include "gt-i386.h"