PR target/84902
[official-gcc.git] / gcc / config / i386 / i386.c
blobf45c756e289bc667cd77ae88a03515df3923c4a0
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2018 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic.h"
42 #include "cfgbuild.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "calls.h"
47 #include "stor-layout.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "cfgrtl.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "gimplify.h"
60 #include "dwarf2.h"
61 #include "tm-constrs.h"
62 #include "params.h"
63 #include "cselib.h"
64 #include "sched-int.h"
65 #include "opts.h"
66 #include "tree-pass.h"
67 #include "context.h"
68 #include "pass_manager.h"
69 #include "target-globals.h"
70 #include "gimple-iterator.h"
71 #include "tree-vectorizer.h"
72 #include "shrink-wrap.h"
73 #include "builtins.h"
74 #include "rtl-iter.h"
75 #include "tree-iterator.h"
76 #include "tree-chkp.h"
77 #include "rtl-chkp.h"
78 #include "dbgcnt.h"
79 #include "case-cfn-macros.h"
80 #include "regrename.h"
81 #include "dojump.h"
82 #include "fold-const-call.h"
83 #include "tree-vrp.h"
84 #include "tree-ssanames.h"
85 #include "selftest.h"
86 #include "selftest-rtl.h"
87 #include "print-rtl.h"
88 #include "intl.h"
89 #include "ifcvt.h"
90 #include "symbol-summary.h"
91 #include "ipa-prop.h"
92 #include "ipa-fnsummary.h"
93 #include "wide-int-bitmask.h"
95 /* This file should be included last. */
96 #include "target-def.h"
98 #include "x86-tune-costs.h"
100 static rtx legitimize_dllimport_symbol (rtx, bool);
101 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
102 static rtx legitimize_pe_coff_symbol (rtx, bool);
103 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
104 static bool ix86_save_reg (unsigned int, bool, bool);
105 static bool ix86_function_naked (const_tree);
106 static bool ix86_notrack_prefixed_insn_p (rtx);
107 static void ix86_emit_restore_reg_using_pop (rtx);
110 #ifndef CHECK_STACK_LIMIT
111 #define CHECK_STACK_LIMIT (-1)
112 #endif
114 /* Return index of given mode in mult and division cost tables. */
115 #define MODE_INDEX(mode) \
116 ((mode) == QImode ? 0 \
117 : (mode) == HImode ? 1 \
118 : (mode) == SImode ? 2 \
119 : (mode) == DImode ? 3 \
120 : 4)
123 /* Set by -mtune. */
124 const struct processor_costs *ix86_tune_cost = NULL;
126 /* Set by -mtune or -Os. */
127 const struct processor_costs *ix86_cost = NULL;
129 /* Processor feature/optimization bitmasks. */
130 #define m_386 (HOST_WIDE_INT_1U<<PROCESSOR_I386)
131 #define m_486 (HOST_WIDE_INT_1U<<PROCESSOR_I486)
132 #define m_PENT (HOST_WIDE_INT_1U<<PROCESSOR_PENTIUM)
133 #define m_LAKEMONT (HOST_WIDE_INT_1U<<PROCESSOR_LAKEMONT)
134 #define m_PPRO (HOST_WIDE_INT_1U<<PROCESSOR_PENTIUMPRO)
135 #define m_PENT4 (HOST_WIDE_INT_1U<<PROCESSOR_PENTIUM4)
136 #define m_NOCONA (HOST_WIDE_INT_1U<<PROCESSOR_NOCONA)
137 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
138 #define m_CORE2 (HOST_WIDE_INT_1U<<PROCESSOR_CORE2)
139 #define m_NEHALEM (HOST_WIDE_INT_1U<<PROCESSOR_NEHALEM)
140 #define m_SANDYBRIDGE (HOST_WIDE_INT_1U<<PROCESSOR_SANDYBRIDGE)
141 #define m_HASWELL (HOST_WIDE_INT_1U<<PROCESSOR_HASWELL)
142 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
143 #define m_BONNELL (HOST_WIDE_INT_1U<<PROCESSOR_BONNELL)
144 #define m_SILVERMONT (HOST_WIDE_INT_1U<<PROCESSOR_SILVERMONT)
145 #define m_KNL (HOST_WIDE_INT_1U<<PROCESSOR_KNL)
146 #define m_KNM (HOST_WIDE_INT_1U<<PROCESSOR_KNM)
147 #define m_SKYLAKE_AVX512 (HOST_WIDE_INT_1U<<PROCESSOR_SKYLAKE_AVX512)
148 #define m_CANNONLAKE (HOST_WIDE_INT_1U<<PROCESSOR_CANNONLAKE)
149 #define m_ICELAKE_CLIENT (HOST_WIDE_INT_1U<<PROCESSOR_ICELAKE_CLIENT)
150 #define m_ICELAKE_SERVER (HOST_WIDE_INT_1U<<PROCESSOR_ICELAKE_SERVER)
151 #define m_INTEL (HOST_WIDE_INT_1U<<PROCESSOR_INTEL)
153 #define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE)
154 #define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6)
155 #define m_K6_GEODE (m_K6 | m_GEODE)
156 #define m_K8 (HOST_WIDE_INT_1U<<PROCESSOR_K8)
157 #define m_ATHLON (HOST_WIDE_INT_1U<<PROCESSOR_ATHLON)
158 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
159 #define m_AMDFAM10 (HOST_WIDE_INT_1U<<PROCESSOR_AMDFAM10)
160 #define m_BDVER1 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER1)
161 #define m_BDVER2 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER2)
162 #define m_BDVER3 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER3)
163 #define m_BDVER4 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER4)
164 #define m_ZNVER1 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER1)
165 #define m_BTVER1 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER1)
166 #define m_BTVER2 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER2)
167 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
168 #define m_BTVER (m_BTVER1 | m_BTVER2)
169 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
170 | m_ZNVER1)
172 #define m_GENERIC (HOST_WIDE_INT_1U<<PROCESSOR_GENERIC)
174 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
175 #undef DEF_TUNE
176 #define DEF_TUNE(tune, name, selector) name,
177 #include "x86-tune.def"
178 #undef DEF_TUNE
181 /* Feature tests against the various tunings. */
182 unsigned char ix86_tune_features[X86_TUNE_LAST];
184 /* Feature tests against the various tunings used to create ix86_tune_features
185 based on the processor mask. */
186 static unsigned HOST_WIDE_INT initial_ix86_tune_features[X86_TUNE_LAST] = {
187 #undef DEF_TUNE
188 #define DEF_TUNE(tune, name, selector) selector,
189 #include "x86-tune.def"
190 #undef DEF_TUNE
193 /* Feature tests against the various architecture variations. */
194 unsigned char ix86_arch_features[X86_ARCH_LAST];
196 /* Feature tests against the various architecture variations, used to create
197 ix86_arch_features based on the processor mask. */
198 static unsigned HOST_WIDE_INT initial_ix86_arch_features[X86_ARCH_LAST] = {
199 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
200 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
202 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
203 ~m_386,
205 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
206 ~(m_386 | m_486),
208 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
209 ~m_386,
211 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
212 ~m_386,
215 /* In case the average insn count for single function invocation is
216 lower than this constant, emit fast (but longer) prologue and
217 epilogue code. */
218 #define FAST_PROLOGUE_INSN_COUNT 20
220 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
221 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
222 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
223 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
225 /* Array of the smallest class containing reg number REGNO, indexed by
226 REGNO. Used by REGNO_REG_CLASS in i386.h. */
228 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
230 /* ax, dx, cx, bx */
231 AREG, DREG, CREG, BREG,
232 /* si, di, bp, sp */
233 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
234 /* FP registers */
235 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
236 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
237 /* arg pointer */
238 NON_Q_REGS,
239 /* flags, fpsr, fpcr, frame */
240 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
241 /* SSE registers */
242 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
243 SSE_REGS, SSE_REGS,
244 /* MMX registers */
245 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
246 MMX_REGS, MMX_REGS,
247 /* REX registers */
248 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
249 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
250 /* SSE REX registers */
251 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
252 SSE_REGS, SSE_REGS,
253 /* AVX-512 SSE registers */
254 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
255 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
256 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
257 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
258 /* Mask registers. */
259 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
260 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
261 /* MPX bound registers */
262 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
265 /* The "default" register map used in 32bit mode. */
267 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
269 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
270 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
271 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
272 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
273 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
274 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
275 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
276 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
277 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
278 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
279 101, 102, 103, 104, /* bound registers */
282 /* The "default" register map used in 64bit mode. */
284 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
286 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
287 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
288 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
289 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
290 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
291 8,9,10,11,12,13,14,15, /* extended integer registers */
292 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
293 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
294 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
295 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
296 126, 127, 128, 129, /* bound registers */
299 /* Define the register numbers to be used in Dwarf debugging information.
300 The SVR4 reference port C compiler uses the following register numbers
301 in its Dwarf output code:
302 0 for %eax (gcc regno = 0)
303 1 for %ecx (gcc regno = 2)
304 2 for %edx (gcc regno = 1)
305 3 for %ebx (gcc regno = 3)
306 4 for %esp (gcc regno = 7)
307 5 for %ebp (gcc regno = 6)
308 6 for %esi (gcc regno = 4)
309 7 for %edi (gcc regno = 5)
310 The following three DWARF register numbers are never generated by
311 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
312 believed these numbers have these meanings.
313 8 for %eip (no gcc equivalent)
314 9 for %eflags (gcc regno = 17)
315 10 for %trapno (no gcc equivalent)
316 It is not at all clear how we should number the FP stack registers
317 for the x86 architecture. If the version of SDB on x86/svr4 were
318 a bit less brain dead with respect to floating-point then we would
319 have a precedent to follow with respect to DWARF register numbers
320 for x86 FP registers, but the SDB on x86/svr4 was so completely
321 broken with respect to FP registers that it is hardly worth thinking
322 of it as something to strive for compatibility with.
323 The version of x86/svr4 SDB I had does (partially)
324 seem to believe that DWARF register number 11 is associated with
325 the x86 register %st(0), but that's about all. Higher DWARF
326 register numbers don't seem to be associated with anything in
327 particular, and even for DWARF regno 11, SDB only seemed to under-
328 stand that it should say that a variable lives in %st(0) (when
329 asked via an `=' command) if we said it was in DWARF regno 11,
330 but SDB still printed garbage when asked for the value of the
331 variable in question (via a `/' command).
332 (Also note that the labels SDB printed for various FP stack regs
333 when doing an `x' command were all wrong.)
334 Note that these problems generally don't affect the native SVR4
335 C compiler because it doesn't allow the use of -O with -g and
336 because when it is *not* optimizing, it allocates a memory
337 location for each floating-point variable, and the memory
338 location is what gets described in the DWARF AT_location
339 attribute for the variable in question.
340 Regardless of the severe mental illness of the x86/svr4 SDB, we
341 do something sensible here and we use the following DWARF
342 register numbers. Note that these are all stack-top-relative
343 numbers.
344 11 for %st(0) (gcc regno = 8)
345 12 for %st(1) (gcc regno = 9)
346 13 for %st(2) (gcc regno = 10)
347 14 for %st(3) (gcc regno = 11)
348 15 for %st(4) (gcc regno = 12)
349 16 for %st(5) (gcc regno = 13)
350 17 for %st(6) (gcc regno = 14)
351 18 for %st(7) (gcc regno = 15)
353 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
355 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
356 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
357 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
358 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
359 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
360 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
361 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
362 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
363 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
364 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
365 101, 102, 103, 104, /* bound registers */
368 /* Define parameter passing and return registers. */
370 static int const x86_64_int_parameter_registers[6] =
372 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
375 static int const x86_64_ms_abi_int_parameter_registers[4] =
377 CX_REG, DX_REG, R8_REG, R9_REG
380 static int const x86_64_int_return_registers[4] =
382 AX_REG, DX_REG, DI_REG, SI_REG
385 /* Additional registers that are clobbered by SYSV calls. */
387 #define NUM_X86_64_MS_CLOBBERED_REGS 12
388 static int const x86_64_ms_sysv_extra_clobbered_registers
389 [NUM_X86_64_MS_CLOBBERED_REGS] =
391 SI_REG, DI_REG,
392 XMM6_REG, XMM7_REG,
393 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
394 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
397 enum xlogue_stub {
398 XLOGUE_STUB_SAVE,
399 XLOGUE_STUB_RESTORE,
400 XLOGUE_STUB_RESTORE_TAIL,
401 XLOGUE_STUB_SAVE_HFP,
402 XLOGUE_STUB_RESTORE_HFP,
403 XLOGUE_STUB_RESTORE_HFP_TAIL,
405 XLOGUE_STUB_COUNT
408 enum xlogue_stub_sets {
409 XLOGUE_SET_ALIGNED,
410 XLOGUE_SET_ALIGNED_PLUS_8,
411 XLOGUE_SET_HFP_ALIGNED_OR_REALIGN,
412 XLOGUE_SET_HFP_ALIGNED_PLUS_8,
414 XLOGUE_SET_COUNT
417 /* Register save/restore layout used by out-of-line stubs. */
418 class xlogue_layout {
419 public:
420 struct reginfo
422 unsigned regno;
423 HOST_WIDE_INT offset; /* Offset used by stub base pointer (rax or
424 rsi) to where each register is stored. */
427 unsigned get_nregs () const {return m_nregs;}
428 HOST_WIDE_INT get_stack_align_off_in () const {return m_stack_align_off_in;}
430 const reginfo &get_reginfo (unsigned reg) const
432 gcc_assert (reg < m_nregs);
433 return m_regs[reg];
436 static const char *get_stub_name (enum xlogue_stub stub,
437 unsigned n_extra_args);
439 /* Returns an rtx for the stub's symbol based upon
440 1.) the specified stub (save, restore or restore_ret) and
441 2.) the value of cfun->machine->call_ms2sysv_extra_regs and
442 3.) rather or not stack alignment is being performed. */
443 static rtx get_stub_rtx (enum xlogue_stub stub);
445 /* Returns the amount of stack space (including padding) that the stub
446 needs to store registers based upon data in the machine_function. */
447 HOST_WIDE_INT get_stack_space_used () const
449 const struct machine_function *m = cfun->machine;
450 unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1;
452 gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS);
453 return m_regs[last_reg].offset + STUB_INDEX_OFFSET;
456 /* Returns the offset for the base pointer used by the stub. */
457 HOST_WIDE_INT get_stub_ptr_offset () const
459 return STUB_INDEX_OFFSET + m_stack_align_off_in;
462 static const struct xlogue_layout &get_instance ();
463 static unsigned count_stub_managed_regs ();
464 static bool is_stub_managed_reg (unsigned regno, unsigned count);
466 static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70;
467 static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS;
468 static const unsigned MAX_REGS = 18;
469 static const unsigned MAX_EXTRA_REGS = MAX_REGS - MIN_REGS;
470 static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1;
471 static const unsigned STUB_NAME_MAX_LEN = 20;
472 static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT];
473 static const unsigned REG_ORDER[MAX_REGS];
474 static const unsigned REG_ORDER_REALIGN[MAX_REGS];
476 private:
477 xlogue_layout ();
478 xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp);
479 xlogue_layout (const xlogue_layout &);
481 /* True if hard frame pointer is used. */
482 bool m_hfp;
484 /* Max number of register this layout manages. */
485 unsigned m_nregs;
487 /* Incoming offset from 16-byte alignment. */
488 HOST_WIDE_INT m_stack_align_off_in;
490 /* Register order and offsets. */
491 struct reginfo m_regs[MAX_REGS];
493 /* Lazy-inited cache of symbol names for stubs. */
494 static char s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
495 [STUB_NAME_MAX_LEN];
497 static const xlogue_layout s_instances[XLOGUE_SET_COUNT];
500 const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
501 "savms64",
502 "resms64",
503 "resms64x",
504 "savms64f",
505 "resms64f",
506 "resms64fx"
509 const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
510 /* The below offset values are where each register is stored for the layout
511 relative to incoming stack pointer. The value of each m_regs[].offset will
512 be relative to the incoming base pointer (rax or rsi) used by the stub.
514 s_instances: 0 1 2 3
515 Offset: realigned or aligned + 8
516 Register aligned aligned + 8 aligned w/HFP w/HFP */
517 XMM15_REG, /* 0x10 0x18 0x10 0x18 */
518 XMM14_REG, /* 0x20 0x28 0x20 0x28 */
519 XMM13_REG, /* 0x30 0x38 0x30 0x38 */
520 XMM12_REG, /* 0x40 0x48 0x40 0x48 */
521 XMM11_REG, /* 0x50 0x58 0x50 0x58 */
522 XMM10_REG, /* 0x60 0x68 0x60 0x68 */
523 XMM9_REG, /* 0x70 0x78 0x70 0x78 */
524 XMM8_REG, /* 0x80 0x88 0x80 0x88 */
525 XMM7_REG, /* 0x90 0x98 0x90 0x98 */
526 XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */
527 SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */
528 DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */
529 BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */
530 BP_REG, /* 0xc0 0xc8 N/A N/A */
531 R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */
532 R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */
533 R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */
534 R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */
537 /* Instantiate static const values. */
538 const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
539 const unsigned xlogue_layout::MIN_REGS;
540 const unsigned xlogue_layout::MAX_REGS;
541 const unsigned xlogue_layout::MAX_EXTRA_REGS;
542 const unsigned xlogue_layout::VARIANT_COUNT;
543 const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
545 /* Initialize xlogue_layout::s_stub_names to zero. */
546 char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
547 [STUB_NAME_MAX_LEN];
549 /* Instantiates all xlogue_layout instances. */
550 const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
551 xlogue_layout (0, false),
552 xlogue_layout (8, false),
553 xlogue_layout (0, true),
554 xlogue_layout (8, true)
557 /* Return an appropriate const instance of xlogue_layout based upon values
558 in cfun->machine and crtl. */
559 const struct xlogue_layout &
560 xlogue_layout::get_instance ()
562 enum xlogue_stub_sets stub_set;
563 bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
565 if (stack_realign_fp)
566 stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
567 else if (frame_pointer_needed)
568 stub_set = aligned_plus_8
569 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
570 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
571 else
572 stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
574 return s_instances[stub_set];
577 /* Determine how many clobbered registers can be saved by the stub.
578 Returns the count of registers the stub will save and restore. */
579 unsigned
580 xlogue_layout::count_stub_managed_regs ()
582 bool hfp = frame_pointer_needed || stack_realign_fp;
583 unsigned i, count;
584 unsigned regno;
586 for (count = i = MIN_REGS; i < MAX_REGS; ++i)
588 regno = REG_ORDER[i];
589 if (regno == BP_REG && hfp)
590 continue;
591 if (!ix86_save_reg (regno, false, false))
592 break;
593 ++count;
595 return count;
598 /* Determine if register REGNO is a stub managed register given the
599 total COUNT of stub managed registers. */
600 bool
601 xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
603 bool hfp = frame_pointer_needed || stack_realign_fp;
604 unsigned i;
606 for (i = 0; i < count; ++i)
608 gcc_assert (i < MAX_REGS);
609 if (REG_ORDER[i] == BP_REG && hfp)
610 ++count;
611 else if (REG_ORDER[i] == regno)
612 return true;
614 return false;
617 /* Constructor for xlogue_layout. */
618 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
619 : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
620 m_stack_align_off_in (stack_align_off_in)
622 HOST_WIDE_INT offset = stack_align_off_in;
623 unsigned i, j;
625 for (i = j = 0; i < MAX_REGS; ++i)
627 unsigned regno = REG_ORDER[i];
629 if (regno == BP_REG && hfp)
630 continue;
631 if (SSE_REGNO_P (regno))
633 offset += 16;
634 /* Verify that SSE regs are always aligned. */
635 gcc_assert (!((stack_align_off_in + offset) & 15));
637 else
638 offset += 8;
640 m_regs[j].regno = regno;
641 m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
643 gcc_assert (j == m_nregs);
646 const char *
647 xlogue_layout::get_stub_name (enum xlogue_stub stub,
648 unsigned n_extra_regs)
650 const int have_avx = TARGET_AVX;
651 char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
653 /* Lazy init */
654 if (!*name)
656 int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
657 (have_avx ? "avx" : "sse"),
658 STUB_BASE_NAMES[stub],
659 MIN_REGS + n_extra_regs);
660 gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
663 return name;
666 /* Return rtx of a symbol ref for the entry point (based upon
667 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
669 xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
671 const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
672 gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
673 gcc_assert (stub < XLOGUE_STUB_COUNT);
674 gcc_assert (crtl->stack_realign_finalized);
676 return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
679 /* Define the structure for the machine field in struct function. */
681 struct GTY(()) stack_local_entry {
682 unsigned short mode;
683 unsigned short n;
684 rtx rtl;
685 struct stack_local_entry *next;
688 /* Which cpu are we scheduling for. */
689 enum attr_cpu ix86_schedule;
691 /* Which cpu are we optimizing for. */
692 enum processor_type ix86_tune;
694 /* Which instruction set architecture to use. */
695 enum processor_type ix86_arch;
697 /* True if processor has SSE prefetch instruction. */
698 unsigned char x86_prefetch_sse;
700 /* -mstackrealign option */
701 static const char ix86_force_align_arg_pointer_string[]
702 = "force_align_arg_pointer";
704 static rtx (*ix86_gen_leave) (void);
705 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
706 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
707 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
708 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
709 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
710 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
711 static rtx (*ix86_gen_clzero) (rtx);
712 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
713 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
714 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
715 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
716 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
717 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
719 /* Preferred alignment for stack boundary in bits. */
720 unsigned int ix86_preferred_stack_boundary;
722 /* Alignment for incoming stack boundary in bits specified at
723 command line. */
724 static unsigned int ix86_user_incoming_stack_boundary;
726 /* Default alignment for incoming stack boundary in bits. */
727 static unsigned int ix86_default_incoming_stack_boundary;
729 /* Alignment for incoming stack boundary in bits. */
730 unsigned int ix86_incoming_stack_boundary;
732 /* Calling abi specific va_list type nodes. */
733 static GTY(()) tree sysv_va_list_type_node;
734 static GTY(()) tree ms_va_list_type_node;
736 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
737 char internal_label_prefix[16];
738 int internal_label_prefix_len;
740 /* Fence to use after loop using movnt. */
741 tree x86_mfence;
743 /* Register class used for passing given 64bit part of the argument.
744 These represent classes as documented by the PS ABI, with the exception
745 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
746 use SF or DFmode move instead of DImode to avoid reformatting penalties.
748 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
749 whenever possible (upper half does contain padding). */
750 enum x86_64_reg_class
752 X86_64_NO_CLASS,
753 X86_64_INTEGER_CLASS,
754 X86_64_INTEGERSI_CLASS,
755 X86_64_SSE_CLASS,
756 X86_64_SSESF_CLASS,
757 X86_64_SSEDF_CLASS,
758 X86_64_SSEUP_CLASS,
759 X86_64_X87_CLASS,
760 X86_64_X87UP_CLASS,
761 X86_64_COMPLEX_X87_CLASS,
762 X86_64_MEMORY_CLASS
765 #define MAX_CLASSES 8
767 /* Table of constants used by fldpi, fldln2, etc.... */
768 static REAL_VALUE_TYPE ext_80387_constants_table [5];
769 static bool ext_80387_constants_init;
772 static struct machine_function * ix86_init_machine_status (void);
773 static rtx ix86_function_value (const_tree, const_tree, bool);
774 static bool ix86_function_value_regno_p (const unsigned int);
775 static unsigned int ix86_function_arg_boundary (machine_mode,
776 const_tree);
777 static rtx ix86_static_chain (const_tree, bool);
778 static int ix86_function_regparm (const_tree, const_tree);
779 static void ix86_compute_frame_layout (void);
780 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
781 rtx, rtx, int);
782 static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT);
783 static tree ix86_canonical_va_list_type (tree);
784 static void predict_jump (int);
785 static unsigned int split_stack_prologue_scratch_regno (void);
786 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
788 enum ix86_function_specific_strings
790 IX86_FUNCTION_SPECIFIC_ARCH,
791 IX86_FUNCTION_SPECIFIC_TUNE,
792 IX86_FUNCTION_SPECIFIC_MAX
795 static char *ix86_target_string (HOST_WIDE_INT, HOST_WIDE_INT, int, int,
796 const char *, const char *, enum fpmath_unit,
797 bool);
798 static void ix86_function_specific_save (struct cl_target_option *,
799 struct gcc_options *opts);
800 static void ix86_function_specific_restore (struct gcc_options *opts,
801 struct cl_target_option *);
802 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
803 static void ix86_function_specific_print (FILE *, int,
804 struct cl_target_option *);
805 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
806 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
807 struct gcc_options *,
808 struct gcc_options *,
809 struct gcc_options *);
810 static bool ix86_can_inline_p (tree, tree);
811 static void ix86_set_current_function (tree);
812 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
814 static enum calling_abi ix86_function_abi (const_tree);
817 #ifndef SUBTARGET32_DEFAULT_CPU
818 #define SUBTARGET32_DEFAULT_CPU "i386"
819 #endif
821 /* Whether -mtune= or -march= were specified */
822 static int ix86_tune_defaulted;
823 static int ix86_arch_specified;
825 /* Vectorization library interface and handlers. */
826 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
828 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
829 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
831 /* Processor target table, indexed by processor number */
832 struct ptt
834 const char *const name; /* processor name */
835 const struct processor_costs *cost; /* Processor costs */
836 const int align_loop; /* Default alignments. */
837 const int align_loop_max_skip;
838 const int align_jump;
839 const int align_jump_max_skip;
840 const int align_func;
843 /* This table must be in sync with enum processor_type in i386.h. */
844 static const struct ptt processor_target_table[PROCESSOR_max] =
846 {"generic", &generic_cost, 16, 10, 16, 10, 16},
847 {"i386", &i386_cost, 4, 3, 4, 3, 4},
848 {"i486", &i486_cost, 16, 15, 16, 15, 16},
849 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
850 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
851 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
852 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
853 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
854 {"core2", &core_cost, 16, 10, 16, 10, 16},
855 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
856 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
857 {"haswell", &core_cost, 16, 10, 16, 10, 16},
858 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
859 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
860 {"knl", &slm_cost, 16, 15, 16, 7, 16},
861 {"knm", &slm_cost, 16, 15, 16, 7, 16},
862 {"skylake-avx512", &skylake_cost, 16, 10, 16, 10, 16},
863 {"cannonlake", &skylake_cost, 16, 10, 16, 10, 16},
864 {"icelake-client", &skylake_cost, 16, 10, 16, 10, 16},
865 {"icelake-server", &skylake_cost, 16, 10, 16, 10, 16},
866 {"intel", &intel_cost, 16, 15, 16, 7, 16},
867 {"geode", &geode_cost, 0, 0, 0, 0, 0},
868 {"k6", &k6_cost, 32, 7, 32, 7, 32},
869 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
870 {"k8", &k8_cost, 16, 7, 16, 7, 16},
871 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
872 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
873 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
874 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
875 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
876 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
877 {"btver2", &btver2_cost, 16, 10, 16, 7, 11},
878 {"znver1", &znver1_cost, 16, 15, 16, 15, 16}
881 static unsigned int
882 rest_of_handle_insert_vzeroupper (void)
884 int i;
886 /* vzeroupper instructions are inserted immediately after reload to
887 account for possible spills from 256bit or 512bit registers. The pass
888 reuses mode switching infrastructure by re-running mode insertion
889 pass, so disable entities that have already been processed. */
890 for (i = 0; i < MAX_386_ENTITIES; i++)
891 ix86_optimize_mode_switching[i] = 0;
893 ix86_optimize_mode_switching[AVX_U128] = 1;
895 /* Call optimize_mode_switching. */
896 g->get_passes ()->execute_pass_mode_switching ();
897 return 0;
900 /* Return 1 if INSN uses or defines a hard register.
901 Hard register uses in a memory address are ignored.
902 Clobbers and flags definitions are ignored. */
904 static bool
905 has_non_address_hard_reg (rtx_insn *insn)
907 df_ref ref;
908 FOR_EACH_INSN_DEF (ref, insn)
909 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
910 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
911 && DF_REF_REGNO (ref) != FLAGS_REG)
912 return true;
914 FOR_EACH_INSN_USE (ref, insn)
915 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
916 return true;
918 return false;
921 /* Check if comparison INSN may be transformed
922 into vector comparison. Currently we transform
923 zero checks only which look like:
925 (set (reg:CCZ 17 flags)
926 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
927 (subreg:SI (reg:DI x) 0))
928 (const_int 0 [0]))) */
930 static bool
931 convertible_comparison_p (rtx_insn *insn)
933 if (!TARGET_SSE4_1)
934 return false;
936 rtx def_set = single_set (insn);
938 gcc_assert (def_set);
940 rtx src = SET_SRC (def_set);
941 rtx dst = SET_DEST (def_set);
943 gcc_assert (GET_CODE (src) == COMPARE);
945 if (GET_CODE (dst) != REG
946 || REGNO (dst) != FLAGS_REG
947 || GET_MODE (dst) != CCZmode)
948 return false;
950 rtx op1 = XEXP (src, 0);
951 rtx op2 = XEXP (src, 1);
953 if (op2 != CONST0_RTX (GET_MODE (op2)))
954 return false;
956 if (GET_CODE (op1) != IOR)
957 return false;
959 op2 = XEXP (op1, 1);
960 op1 = XEXP (op1, 0);
962 if (!SUBREG_P (op1)
963 || !SUBREG_P (op2)
964 || GET_MODE (op1) != SImode
965 || GET_MODE (op2) != SImode
966 || ((SUBREG_BYTE (op1) != 0
967 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
968 && (SUBREG_BYTE (op2) != 0
969 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
970 return false;
972 op1 = SUBREG_REG (op1);
973 op2 = SUBREG_REG (op2);
975 if (op1 != op2
976 || !REG_P (op1)
977 || GET_MODE (op1) != DImode)
978 return false;
980 return true;
983 /* The DImode version of scalar_to_vector_candidate_p. */
985 static bool
986 dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
988 rtx def_set = single_set (insn);
990 if (!def_set)
991 return false;
993 if (has_non_address_hard_reg (insn))
994 return false;
996 rtx src = SET_SRC (def_set);
997 rtx dst = SET_DEST (def_set);
999 if (GET_CODE (src) == COMPARE)
1000 return convertible_comparison_p (insn);
1002 /* We are interested in DImode promotion only. */
1003 if ((GET_MODE (src) != DImode
1004 && !CONST_INT_P (src))
1005 || GET_MODE (dst) != DImode)
1006 return false;
1008 if (!REG_P (dst) && !MEM_P (dst))
1009 return false;
1011 switch (GET_CODE (src))
1013 case ASHIFTRT:
1014 if (!TARGET_AVX512VL)
1015 return false;
1016 /* FALLTHRU */
1018 case ASHIFT:
1019 case LSHIFTRT:
1020 if (!REG_P (XEXP (src, 1))
1021 && (!SUBREG_P (XEXP (src, 1))
1022 || SUBREG_BYTE (XEXP (src, 1)) != 0
1023 || !REG_P (SUBREG_REG (XEXP (src, 1))))
1024 && (!CONST_INT_P (XEXP (src, 1))
1025 || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)))
1026 return false;
1028 if (GET_MODE (XEXP (src, 1)) != QImode
1029 && !CONST_INT_P (XEXP (src, 1)))
1030 return false;
1031 break;
1033 case PLUS:
1034 case MINUS:
1035 case IOR:
1036 case XOR:
1037 case AND:
1038 if (!REG_P (XEXP (src, 1))
1039 && !MEM_P (XEXP (src, 1))
1040 && !CONST_INT_P (XEXP (src, 1)))
1041 return false;
1043 if (GET_MODE (XEXP (src, 1)) != DImode
1044 && !CONST_INT_P (XEXP (src, 1)))
1045 return false;
1046 break;
1048 case NEG:
1049 case NOT:
1050 break;
1052 case REG:
1053 return true;
1055 case MEM:
1056 case CONST_INT:
1057 return REG_P (dst);
1059 default:
1060 return false;
1063 if (!REG_P (XEXP (src, 0))
1064 && !MEM_P (XEXP (src, 0))
1065 && !CONST_INT_P (XEXP (src, 0))
1066 /* Check for andnot case. */
1067 && (GET_CODE (src) != AND
1068 || GET_CODE (XEXP (src, 0)) != NOT
1069 || !REG_P (XEXP (XEXP (src, 0), 0))))
1070 return false;
1072 if (GET_MODE (XEXP (src, 0)) != DImode
1073 && !CONST_INT_P (XEXP (src, 0)))
1074 return false;
1076 return true;
1079 /* The TImode version of scalar_to_vector_candidate_p. */
1081 static bool
1082 timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1084 rtx def_set = single_set (insn);
1086 if (!def_set)
1087 return false;
1089 if (has_non_address_hard_reg (insn))
1090 return false;
1092 rtx src = SET_SRC (def_set);
1093 rtx dst = SET_DEST (def_set);
1095 /* Only TImode load and store are allowed. */
1096 if (GET_MODE (dst) != TImode)
1097 return false;
1099 if (MEM_P (dst))
1101 /* Check for store. Memory must be aligned or unaligned store
1102 is optimal. Only support store from register, standard SSE
1103 constant or CONST_WIDE_INT generated from piecewise store.
1105 ??? Verify performance impact before enabling CONST_INT for
1106 __int128 store. */
1107 if (misaligned_operand (dst, TImode)
1108 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1109 return false;
1111 switch (GET_CODE (src))
1113 default:
1114 return false;
1116 case REG:
1117 case CONST_WIDE_INT:
1118 return true;
1120 case CONST_INT:
1121 return standard_sse_constant_p (src, TImode);
1124 else if (MEM_P (src))
1126 /* Check for load. Memory must be aligned or unaligned load is
1127 optimal. */
1128 return (REG_P (dst)
1129 && (!misaligned_operand (src, TImode)
1130 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1133 return false;
1136 /* Return 1 if INSN may be converted into vector
1137 instruction. */
1139 static bool
1140 scalar_to_vector_candidate_p (rtx_insn *insn)
1142 if (TARGET_64BIT)
1143 return timode_scalar_to_vector_candidate_p (insn);
1144 else
1145 return dimode_scalar_to_vector_candidate_p (insn);
1148 /* The DImode version of remove_non_convertible_regs. */
1150 static void
1151 dimode_remove_non_convertible_regs (bitmap candidates)
1153 bitmap_iterator bi;
1154 unsigned id;
1155 bitmap regs = BITMAP_ALLOC (NULL);
1157 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1159 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1160 rtx reg = SET_DEST (def_set);
1162 if (!REG_P (reg)
1163 || bitmap_bit_p (regs, REGNO (reg))
1164 || HARD_REGISTER_P (reg))
1165 continue;
1167 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
1168 def;
1169 def = DF_REF_NEXT_REG (def))
1171 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1173 if (dump_file)
1174 fprintf (dump_file,
1175 "r%d has non convertible definition in insn %d\n",
1176 REGNO (reg), DF_REF_INSN_UID (def));
1178 bitmap_set_bit (regs, REGNO (reg));
1179 break;
1184 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1186 for (df_ref def = DF_REG_DEF_CHAIN (id);
1187 def;
1188 def = DF_REF_NEXT_REG (def))
1189 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1191 if (dump_file)
1192 fprintf (dump_file, "Removing insn %d from candidates list\n",
1193 DF_REF_INSN_UID (def));
1195 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1199 BITMAP_FREE (regs);
1202 /* For a register REGNO, scan instructions for its defs and uses.
1203 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1205 static void
1206 timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1207 unsigned int regno)
1209 for (df_ref def = DF_REG_DEF_CHAIN (regno);
1210 def;
1211 def = DF_REF_NEXT_REG (def))
1213 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1215 if (dump_file)
1216 fprintf (dump_file,
1217 "r%d has non convertible def in insn %d\n",
1218 regno, DF_REF_INSN_UID (def));
1220 bitmap_set_bit (regs, regno);
1221 break;
1225 for (df_ref ref = DF_REG_USE_CHAIN (regno);
1226 ref;
1227 ref = DF_REF_NEXT_REG (ref))
1229 /* Debug instructions are skipped. */
1230 if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1231 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1233 if (dump_file)
1234 fprintf (dump_file,
1235 "r%d has non convertible use in insn %d\n",
1236 regno, DF_REF_INSN_UID (ref));
1238 bitmap_set_bit (regs, regno);
1239 break;
1244 /* The TImode version of remove_non_convertible_regs. */
1246 static void
1247 timode_remove_non_convertible_regs (bitmap candidates)
1249 bitmap_iterator bi;
1250 unsigned id;
1251 bitmap regs = BITMAP_ALLOC (NULL);
1253 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1255 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1256 rtx dest = SET_DEST (def_set);
1257 rtx src = SET_SRC (def_set);
1259 if ((!REG_P (dest)
1260 || bitmap_bit_p (regs, REGNO (dest))
1261 || HARD_REGISTER_P (dest))
1262 && (!REG_P (src)
1263 || bitmap_bit_p (regs, REGNO (src))
1264 || HARD_REGISTER_P (src)))
1265 continue;
1267 if (REG_P (dest))
1268 timode_check_non_convertible_regs (candidates, regs,
1269 REGNO (dest));
1271 if (REG_P (src))
1272 timode_check_non_convertible_regs (candidates, regs,
1273 REGNO (src));
1276 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1278 for (df_ref def = DF_REG_DEF_CHAIN (id);
1279 def;
1280 def = DF_REF_NEXT_REG (def))
1281 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1283 if (dump_file)
1284 fprintf (dump_file, "Removing insn %d from candidates list\n",
1285 DF_REF_INSN_UID (def));
1287 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1290 for (df_ref ref = DF_REG_USE_CHAIN (id);
1291 ref;
1292 ref = DF_REF_NEXT_REG (ref))
1293 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1295 if (dump_file)
1296 fprintf (dump_file, "Removing insn %d from candidates list\n",
1297 DF_REF_INSN_UID (ref));
1299 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1303 BITMAP_FREE (regs);
1306 /* For a given bitmap of insn UIDs scans all instruction and
1307 remove insn from CANDIDATES in case it has both convertible
1308 and not convertible definitions.
1310 All insns in a bitmap are conversion candidates according to
1311 scalar_to_vector_candidate_p. Currently it implies all insns
1312 are single_set. */
1314 static void
1315 remove_non_convertible_regs (bitmap candidates)
1317 if (TARGET_64BIT)
1318 timode_remove_non_convertible_regs (candidates);
1319 else
1320 dimode_remove_non_convertible_regs (candidates);
1323 class scalar_chain
1325 public:
1326 scalar_chain ();
1327 virtual ~scalar_chain ();
1329 static unsigned max_id;
1331 /* ID of a chain. */
1332 unsigned int chain_id;
1333 /* A queue of instructions to be included into a chain. */
1334 bitmap queue;
1335 /* Instructions included into a chain. */
1336 bitmap insns;
1337 /* All registers defined by a chain. */
1338 bitmap defs;
1339 /* Registers used in both vector and sclar modes. */
1340 bitmap defs_conv;
1342 void build (bitmap candidates, unsigned insn_uid);
1343 virtual int compute_convert_gain () = 0;
1344 int convert ();
1346 protected:
1347 void add_to_queue (unsigned insn_uid);
1348 void emit_conversion_insns (rtx insns, rtx_insn *pos);
1350 private:
1351 void add_insn (bitmap candidates, unsigned insn_uid);
1352 void analyze_register_chain (bitmap candidates, df_ref ref);
1353 virtual void mark_dual_mode_def (df_ref def) = 0;
1354 virtual void convert_insn (rtx_insn *insn) = 0;
1355 virtual void convert_registers () = 0;
1358 class dimode_scalar_chain : public scalar_chain
1360 public:
1361 int compute_convert_gain ();
1362 private:
1363 void mark_dual_mode_def (df_ref def);
1364 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
1365 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
1366 void convert_insn (rtx_insn *insn);
1367 void convert_op (rtx *op, rtx_insn *insn);
1368 void convert_reg (unsigned regno);
1369 void make_vector_copies (unsigned regno);
1370 void convert_registers ();
1371 int vector_const_cost (rtx exp);
1374 class timode_scalar_chain : public scalar_chain
1376 public:
1377 /* Convert from TImode to V1TImode is always faster. */
1378 int compute_convert_gain () { return 1; }
1380 private:
1381 void mark_dual_mode_def (df_ref def);
1382 void fix_debug_reg_uses (rtx reg);
1383 void convert_insn (rtx_insn *insn);
1384 /* We don't convert registers to difference size. */
1385 void convert_registers () {}
1388 unsigned scalar_chain::max_id = 0;
1390 /* Initialize new chain. */
1392 scalar_chain::scalar_chain ()
1394 chain_id = ++max_id;
1396 if (dump_file)
1397 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
1399 bitmap_obstack_initialize (NULL);
1400 insns = BITMAP_ALLOC (NULL);
1401 defs = BITMAP_ALLOC (NULL);
1402 defs_conv = BITMAP_ALLOC (NULL);
1403 queue = NULL;
1406 /* Free chain's data. */
1408 scalar_chain::~scalar_chain ()
1410 BITMAP_FREE (insns);
1411 BITMAP_FREE (defs);
1412 BITMAP_FREE (defs_conv);
1413 bitmap_obstack_release (NULL);
1416 /* Add instruction into chains' queue. */
1418 void
1419 scalar_chain::add_to_queue (unsigned insn_uid)
1421 if (bitmap_bit_p (insns, insn_uid)
1422 || bitmap_bit_p (queue, insn_uid))
1423 return;
1425 if (dump_file)
1426 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
1427 insn_uid, chain_id);
1428 bitmap_set_bit (queue, insn_uid);
1431 /* For DImode conversion, mark register defined by DEF as requiring
1432 conversion. */
1434 void
1435 dimode_scalar_chain::mark_dual_mode_def (df_ref def)
1437 gcc_assert (DF_REF_REG_DEF_P (def));
1439 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
1440 return;
1442 if (dump_file)
1443 fprintf (dump_file,
1444 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
1445 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
1447 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
1450 /* For TImode conversion, it is unused. */
1452 void
1453 timode_scalar_chain::mark_dual_mode_def (df_ref)
1455 gcc_unreachable ();
1458 /* Check REF's chain to add new insns into a queue
1459 and find registers requiring conversion. */
1461 void
1462 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
1464 df_link *chain;
1466 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
1467 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
1468 add_to_queue (DF_REF_INSN_UID (ref));
1470 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
1472 unsigned uid = DF_REF_INSN_UID (chain->ref);
1474 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
1475 continue;
1477 if (!DF_REF_REG_MEM_P (chain->ref))
1479 if (bitmap_bit_p (insns, uid))
1480 continue;
1482 if (bitmap_bit_p (candidates, uid))
1484 add_to_queue (uid);
1485 continue;
1489 if (DF_REF_REG_DEF_P (chain->ref))
1491 if (dump_file)
1492 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
1493 DF_REF_REGNO (chain->ref), uid);
1494 mark_dual_mode_def (chain->ref);
1496 else
1498 if (dump_file)
1499 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
1500 DF_REF_REGNO (chain->ref), uid);
1501 mark_dual_mode_def (ref);
1506 /* Add instruction into a chain. */
1508 void
1509 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
1511 if (bitmap_bit_p (insns, insn_uid))
1512 return;
1514 if (dump_file)
1515 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
1517 bitmap_set_bit (insns, insn_uid);
1519 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1520 rtx def_set = single_set (insn);
1521 if (def_set && REG_P (SET_DEST (def_set))
1522 && !HARD_REGISTER_P (SET_DEST (def_set)))
1523 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
1525 df_ref ref;
1526 df_ref def;
1527 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1528 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
1529 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
1530 def;
1531 def = DF_REF_NEXT_REG (def))
1532 analyze_register_chain (candidates, def);
1533 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1534 if (!DF_REF_REG_MEM_P (ref))
1535 analyze_register_chain (candidates, ref);
1538 /* Build new chain starting from insn INSN_UID recursively
1539 adding all dependent uses and definitions. */
1541 void
1542 scalar_chain::build (bitmap candidates, unsigned insn_uid)
1544 queue = BITMAP_ALLOC (NULL);
1545 bitmap_set_bit (queue, insn_uid);
1547 if (dump_file)
1548 fprintf (dump_file, "Building chain #%d...\n", chain_id);
1550 while (!bitmap_empty_p (queue))
1552 insn_uid = bitmap_first_set_bit (queue);
1553 bitmap_clear_bit (queue, insn_uid);
1554 bitmap_clear_bit (candidates, insn_uid);
1555 add_insn (candidates, insn_uid);
1558 if (dump_file)
1560 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
1561 fprintf (dump_file, " insns: ");
1562 dump_bitmap (dump_file, insns);
1563 if (!bitmap_empty_p (defs_conv))
1565 bitmap_iterator bi;
1566 unsigned id;
1567 const char *comma = "";
1568 fprintf (dump_file, " defs to convert: ");
1569 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
1571 fprintf (dump_file, "%sr%d", comma, id);
1572 comma = ", ";
1574 fprintf (dump_file, "\n");
1578 BITMAP_FREE (queue);
1581 /* Return a cost of building a vector costant
1582 instead of using a scalar one. */
1585 dimode_scalar_chain::vector_const_cost (rtx exp)
1587 gcc_assert (CONST_INT_P (exp));
1589 if (standard_sse_constant_p (exp, V2DImode))
1590 return COSTS_N_INSNS (1);
1591 return ix86_cost->sse_load[1];
1594 /* Compute a gain for chain conversion. */
1597 dimode_scalar_chain::compute_convert_gain ()
1599 bitmap_iterator bi;
1600 unsigned insn_uid;
1601 int gain = 0;
1602 int cost = 0;
1604 if (dump_file)
1605 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
1607 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
1609 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1610 rtx def_set = single_set (insn);
1611 rtx src = SET_SRC (def_set);
1612 rtx dst = SET_DEST (def_set);
1614 if (REG_P (src) && REG_P (dst))
1615 gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move;
1616 else if (REG_P (src) && MEM_P (dst))
1617 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1618 else if (MEM_P (src) && REG_P (dst))
1619 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
1620 else if (GET_CODE (src) == ASHIFT
1621 || GET_CODE (src) == ASHIFTRT
1622 || GET_CODE (src) == LSHIFTRT)
1624 if (CONST_INT_P (XEXP (src, 0)))
1625 gain -= vector_const_cost (XEXP (src, 0));
1626 if (CONST_INT_P (XEXP (src, 1)))
1628 gain += ix86_cost->shift_const;
1629 if (INTVAL (XEXP (src, 1)) >= 32)
1630 gain -= COSTS_N_INSNS (1);
1632 else
1633 /* Additional gain for omitting two CMOVs. */
1634 gain += ix86_cost->shift_var + COSTS_N_INSNS (2);
1636 else if (GET_CODE (src) == PLUS
1637 || GET_CODE (src) == MINUS
1638 || GET_CODE (src) == IOR
1639 || GET_CODE (src) == XOR
1640 || GET_CODE (src) == AND)
1642 gain += ix86_cost->add;
1643 /* Additional gain for andnot for targets without BMI. */
1644 if (GET_CODE (XEXP (src, 0)) == NOT
1645 && !TARGET_BMI)
1646 gain += 2 * ix86_cost->add;
1648 if (CONST_INT_P (XEXP (src, 0)))
1649 gain -= vector_const_cost (XEXP (src, 0));
1650 if (CONST_INT_P (XEXP (src, 1)))
1651 gain -= vector_const_cost (XEXP (src, 1));
1653 else if (GET_CODE (src) == NEG
1654 || GET_CODE (src) == NOT)
1655 gain += ix86_cost->add - COSTS_N_INSNS (1);
1656 else if (GET_CODE (src) == COMPARE)
1658 /* Assume comparison cost is the same. */
1660 else if (CONST_INT_P (src))
1662 if (REG_P (dst))
1663 gain += COSTS_N_INSNS (2);
1664 else if (MEM_P (dst))
1665 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1666 gain -= vector_const_cost (src);
1668 else
1669 gcc_unreachable ();
1672 if (dump_file)
1673 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
1675 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
1676 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
1678 if (dump_file)
1679 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
1681 gain -= cost;
1683 if (dump_file)
1684 fprintf (dump_file, " Total gain: %d\n", gain);
1686 return gain;
1689 /* Replace REG in X with a V2DI subreg of NEW_REG. */
1692 dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
1694 if (x == reg)
1695 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
1697 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
1698 int i, j;
1699 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
1701 if (fmt[i] == 'e')
1702 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
1703 else if (fmt[i] == 'E')
1704 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1705 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
1706 reg, new_reg);
1709 return x;
1712 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
1714 void
1715 dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn,
1716 rtx reg, rtx new_reg)
1718 replace_with_subreg (single_set (insn), reg, new_reg);
1721 /* Insert generated conversion instruction sequence INSNS
1722 after instruction AFTER. New BB may be required in case
1723 instruction has EH region attached. */
1725 void
1726 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
1728 if (!control_flow_insn_p (after))
1730 emit_insn_after (insns, after);
1731 return;
1734 basic_block bb = BLOCK_FOR_INSN (after);
1735 edge e = find_fallthru_edge (bb->succs);
1736 gcc_assert (e);
1738 basic_block new_bb = split_edge (e);
1739 emit_insn_after (insns, BB_HEAD (new_bb));
1742 /* Make vector copies for all register REGNO definitions
1743 and replace its uses in a chain. */
1745 void
1746 dimode_scalar_chain::make_vector_copies (unsigned regno)
1748 rtx reg = regno_reg_rtx[regno];
1749 rtx vreg = gen_reg_rtx (DImode);
1750 bool count_reg = false;
1751 df_ref ref;
1753 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1754 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1756 df_ref use;
1758 /* Detect the count register of a shift instruction. */
1759 for (use = DF_REG_USE_CHAIN (regno); use; use = DF_REF_NEXT_REG (use))
1760 if (bitmap_bit_p (insns, DF_REF_INSN_UID (use)))
1762 rtx_insn *insn = DF_REF_INSN (use);
1763 rtx def_set = single_set (insn);
1765 gcc_assert (def_set);
1767 rtx src = SET_SRC (def_set);
1769 if ((GET_CODE (src) == ASHIFT
1770 || GET_CODE (src) == ASHIFTRT
1771 || GET_CODE (src) == LSHIFTRT)
1772 && !CONST_INT_P (XEXP (src, 1))
1773 && reg_or_subregno (XEXP (src, 1)) == regno)
1774 count_reg = true;
1777 start_sequence ();
1778 if (count_reg)
1780 rtx qreg = gen_lowpart (QImode, reg);
1781 rtx tmp = gen_reg_rtx (SImode);
1783 if (TARGET_ZERO_EXTEND_WITH_AND
1784 && optimize_function_for_speed_p (cfun))
1786 emit_move_insn (tmp, const0_rtx);
1787 emit_insn (gen_movstrictqi
1788 (gen_lowpart (QImode, tmp), qreg));
1790 else
1791 emit_insn (gen_rtx_SET
1792 (tmp, gen_rtx_ZERO_EXTEND (SImode, qreg)));
1794 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1796 rtx slot = assign_386_stack_local (SImode, SLOT_STV_TEMP);
1797 emit_move_insn (slot, tmp);
1798 tmp = copy_rtx (slot);
1801 emit_insn (gen_zero_extendsidi2 (vreg, tmp));
1803 else if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1805 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1806 emit_move_insn (adjust_address (tmp, SImode, 0),
1807 gen_rtx_SUBREG (SImode, reg, 0));
1808 emit_move_insn (adjust_address (tmp, SImode, 4),
1809 gen_rtx_SUBREG (SImode, reg, 4));
1810 emit_move_insn (vreg, tmp);
1812 else if (TARGET_SSE4_1)
1814 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1815 CONST0_RTX (V4SImode),
1816 gen_rtx_SUBREG (SImode, reg, 0)));
1817 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
1818 gen_rtx_SUBREG (V4SImode, vreg, 0),
1819 gen_rtx_SUBREG (SImode, reg, 4),
1820 GEN_INT (2)));
1822 else
1824 rtx tmp = gen_reg_rtx (DImode);
1825 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1826 CONST0_RTX (V4SImode),
1827 gen_rtx_SUBREG (SImode, reg, 0)));
1828 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
1829 CONST0_RTX (V4SImode),
1830 gen_rtx_SUBREG (SImode, reg, 4)));
1831 emit_insn (gen_vec_interleave_lowv4si
1832 (gen_rtx_SUBREG (V4SImode, vreg, 0),
1833 gen_rtx_SUBREG (V4SImode, vreg, 0),
1834 gen_rtx_SUBREG (V4SImode, tmp, 0)));
1836 rtx_insn *seq = get_insns ();
1837 end_sequence ();
1838 rtx_insn *insn = DF_REF_INSN (ref);
1839 emit_conversion_insns (seq, insn);
1841 if (dump_file)
1842 fprintf (dump_file,
1843 " Copied r%d to a vector register r%d for insn %d\n",
1844 regno, REGNO (vreg), INSN_UID (insn));
1847 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1848 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1850 rtx_insn *insn = DF_REF_INSN (ref);
1851 if (count_reg)
1853 rtx def_set = single_set (insn);
1854 gcc_assert (def_set);
1856 rtx src = SET_SRC (def_set);
1858 if ((GET_CODE (src) == ASHIFT
1859 || GET_CODE (src) == ASHIFTRT
1860 || GET_CODE (src) == LSHIFTRT)
1861 && !CONST_INT_P (XEXP (src, 1))
1862 && reg_or_subregno (XEXP (src, 1)) == regno)
1863 XEXP (src, 1) = vreg;
1865 else
1866 replace_with_subreg_in_insn (insn, reg, vreg);
1868 if (dump_file)
1869 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
1870 regno, REGNO (vreg), INSN_UID (insn));
1874 /* Convert all definitions of register REGNO
1875 and fix its uses. Scalar copies may be created
1876 in case register is used in not convertible insn. */
1878 void
1879 dimode_scalar_chain::convert_reg (unsigned regno)
1881 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
1882 rtx reg = regno_reg_rtx[regno];
1883 rtx scopy = NULL_RTX;
1884 df_ref ref;
1885 bitmap conv;
1887 conv = BITMAP_ALLOC (NULL);
1888 bitmap_copy (conv, insns);
1890 if (scalar_copy)
1891 scopy = gen_reg_rtx (DImode);
1893 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1895 rtx_insn *insn = DF_REF_INSN (ref);
1896 rtx def_set = single_set (insn);
1897 rtx src = SET_SRC (def_set);
1898 rtx reg = DF_REF_REG (ref);
1900 if (!MEM_P (src))
1902 replace_with_subreg_in_insn (insn, reg, reg);
1903 bitmap_clear_bit (conv, INSN_UID (insn));
1906 if (scalar_copy)
1908 start_sequence ();
1909 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
1911 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1912 emit_move_insn (tmp, reg);
1913 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1914 adjust_address (tmp, SImode, 0));
1915 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1916 adjust_address (tmp, SImode, 4));
1918 else if (TARGET_SSE4_1)
1920 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
1921 emit_insn
1922 (gen_rtx_SET
1923 (gen_rtx_SUBREG (SImode, scopy, 0),
1924 gen_rtx_VEC_SELECT (SImode,
1925 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1927 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
1928 emit_insn
1929 (gen_rtx_SET
1930 (gen_rtx_SUBREG (SImode, scopy, 4),
1931 gen_rtx_VEC_SELECT (SImode,
1932 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1934 else
1936 rtx vcopy = gen_reg_rtx (V2DImode);
1937 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
1938 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1939 gen_rtx_SUBREG (SImode, vcopy, 0));
1940 emit_move_insn (vcopy,
1941 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
1942 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1943 gen_rtx_SUBREG (SImode, vcopy, 0));
1945 rtx_insn *seq = get_insns ();
1946 end_sequence ();
1947 emit_conversion_insns (seq, insn);
1949 if (dump_file)
1950 fprintf (dump_file,
1951 " Copied r%d to a scalar register r%d for insn %d\n",
1952 regno, REGNO (scopy), INSN_UID (insn));
1956 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1957 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1959 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
1961 rtx_insn *insn = DF_REF_INSN (ref);
1963 rtx def_set = single_set (insn);
1964 gcc_assert (def_set);
1966 rtx src = SET_SRC (def_set);
1967 rtx dst = SET_DEST (def_set);
1969 if ((GET_CODE (src) == ASHIFT
1970 || GET_CODE (src) == ASHIFTRT
1971 || GET_CODE (src) == LSHIFTRT)
1972 && !CONST_INT_P (XEXP (src, 1))
1973 && reg_or_subregno (XEXP (src, 1)) == regno)
1975 rtx tmp2 = gen_reg_rtx (V2DImode);
1977 start_sequence ();
1979 if (TARGET_SSE4_1)
1980 emit_insn (gen_sse4_1_zero_extendv2qiv2di2
1981 (tmp2, gen_rtx_SUBREG (V16QImode, reg, 0)));
1982 else
1984 rtx vec_cst
1985 = gen_rtx_CONST_VECTOR (V2DImode,
1986 gen_rtvec (2, GEN_INT (0xff),
1987 const0_rtx));
1988 vec_cst
1989 = validize_mem (force_const_mem (V2DImode, vec_cst));
1991 emit_insn (gen_rtx_SET
1992 (tmp2,
1993 gen_rtx_AND (V2DImode,
1994 gen_rtx_SUBREG (V2DImode, reg, 0),
1995 vec_cst)));
1997 rtx_insn *seq = get_insns ();
1998 end_sequence ();
2000 emit_insn_before (seq, insn);
2002 XEXP (src, 1) = gen_rtx_SUBREG (DImode, tmp2, 0);
2004 else if (!MEM_P (dst) || !REG_P (src))
2005 replace_with_subreg_in_insn (insn, reg, reg);
2007 bitmap_clear_bit (conv, INSN_UID (insn));
2010 /* Skip debug insns and uninitialized uses. */
2011 else if (DF_REF_CHAIN (ref)
2012 && NONDEBUG_INSN_P (DF_REF_INSN (ref)))
2014 gcc_assert (scopy);
2015 replace_rtx (DF_REF_INSN (ref), reg, scopy);
2016 df_insn_rescan (DF_REF_INSN (ref));
2019 BITMAP_FREE (conv);
2022 /* Convert operand OP in INSN. We should handle
2023 memory operands and uninitialized registers.
2024 All other register uses are converted during
2025 registers conversion. */
2027 void
2028 dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
2030 *op = copy_rtx_if_shared (*op);
2032 if (GET_CODE (*op) == NOT)
2034 convert_op (&XEXP (*op, 0), insn);
2035 PUT_MODE (*op, V2DImode);
2037 else if (MEM_P (*op))
2039 rtx tmp = gen_reg_rtx (DImode);
2041 emit_insn_before (gen_move_insn (tmp, *op), insn);
2042 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
2044 if (dump_file)
2045 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
2046 INSN_UID (insn), REGNO (tmp));
2048 else if (REG_P (*op))
2050 /* We may have not converted register usage in case
2051 this register has no definition. Otherwise it
2052 should be converted in convert_reg. */
2053 df_ref ref;
2054 FOR_EACH_INSN_USE (ref, insn)
2055 if (DF_REF_REGNO (ref) == REGNO (*op))
2057 gcc_assert (!DF_REF_CHAIN (ref));
2058 break;
2060 *op = gen_rtx_SUBREG (V2DImode, *op, 0);
2062 else if (CONST_INT_P (*op))
2064 rtx vec_cst;
2065 rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0);
2067 /* Prefer all ones vector in case of -1. */
2068 if (constm1_operand (*op, GET_MODE (*op)))
2069 vec_cst = CONSTM1_RTX (V2DImode);
2070 else
2071 vec_cst = gen_rtx_CONST_VECTOR (V2DImode,
2072 gen_rtvec (2, *op, const0_rtx));
2074 if (!standard_sse_constant_p (vec_cst, V2DImode))
2076 start_sequence ();
2077 vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst));
2078 rtx_insn *seq = get_insns ();
2079 end_sequence ();
2080 emit_insn_before (seq, insn);
2083 emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
2084 *op = tmp;
2086 else
2088 gcc_assert (SUBREG_P (*op));
2089 gcc_assert (GET_MODE (*op) == V2DImode);
2093 /* Convert INSN to vector mode. */
2095 void
2096 dimode_scalar_chain::convert_insn (rtx_insn *insn)
2098 rtx def_set = single_set (insn);
2099 rtx src = SET_SRC (def_set);
2100 rtx dst = SET_DEST (def_set);
2101 rtx subreg;
2103 if (MEM_P (dst) && !REG_P (src))
2105 /* There are no scalar integer instructions and therefore
2106 temporary register usage is required. */
2107 rtx tmp = gen_reg_rtx (DImode);
2108 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
2109 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
2112 switch (GET_CODE (src))
2114 case ASHIFT:
2115 case ASHIFTRT:
2116 case LSHIFTRT:
2117 convert_op (&XEXP (src, 0), insn);
2118 PUT_MODE (src, V2DImode);
2119 break;
2121 case PLUS:
2122 case MINUS:
2123 case IOR:
2124 case XOR:
2125 case AND:
2126 convert_op (&XEXP (src, 0), insn);
2127 convert_op (&XEXP (src, 1), insn);
2128 PUT_MODE (src, V2DImode);
2129 break;
2131 case NEG:
2132 src = XEXP (src, 0);
2133 convert_op (&src, insn);
2134 subreg = gen_reg_rtx (V2DImode);
2135 emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn);
2136 src = gen_rtx_MINUS (V2DImode, subreg, src);
2137 break;
2139 case NOT:
2140 src = XEXP (src, 0);
2141 convert_op (&src, insn);
2142 subreg = gen_reg_rtx (V2DImode);
2143 emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn);
2144 src = gen_rtx_XOR (V2DImode, src, subreg);
2145 break;
2147 case MEM:
2148 if (!REG_P (dst))
2149 convert_op (&src, insn);
2150 break;
2152 case REG:
2153 if (!MEM_P (dst))
2154 convert_op (&src, insn);
2155 break;
2157 case SUBREG:
2158 gcc_assert (GET_MODE (src) == V2DImode);
2159 break;
2161 case COMPARE:
2162 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
2164 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
2165 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
2167 if (REG_P (src))
2168 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
2169 else
2170 subreg = copy_rtx_if_shared (src);
2171 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
2172 copy_rtx_if_shared (subreg),
2173 copy_rtx_if_shared (subreg)),
2174 insn);
2175 dst = gen_rtx_REG (CCmode, FLAGS_REG);
2176 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
2177 copy_rtx_if_shared (src)),
2178 UNSPEC_PTEST);
2179 break;
2181 case CONST_INT:
2182 convert_op (&src, insn);
2183 break;
2185 default:
2186 gcc_unreachable ();
2189 SET_SRC (def_set) = src;
2190 SET_DEST (def_set) = dst;
2192 /* Drop possible dead definitions. */
2193 PATTERN (insn) = def_set;
2195 INSN_CODE (insn) = -1;
2196 recog_memoized (insn);
2197 df_insn_rescan (insn);
2200 /* Fix uses of converted REG in debug insns. */
2202 void
2203 timode_scalar_chain::fix_debug_reg_uses (rtx reg)
2205 if (!flag_var_tracking)
2206 return;
2208 df_ref ref, next;
2209 for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
2211 rtx_insn *insn = DF_REF_INSN (ref);
2212 /* Make sure the next ref is for a different instruction,
2213 so that we're not affected by the rescan. */
2214 next = DF_REF_NEXT_REG (ref);
2215 while (next && DF_REF_INSN (next) == insn)
2216 next = DF_REF_NEXT_REG (next);
2218 if (DEBUG_INSN_P (insn))
2220 /* It may be a debug insn with a TImode variable in
2221 register. */
2222 bool changed = false;
2223 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
2225 rtx *loc = DF_REF_LOC (ref);
2226 if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
2228 *loc = gen_rtx_SUBREG (TImode, *loc, 0);
2229 changed = true;
2232 if (changed)
2233 df_insn_rescan (insn);
2238 /* Convert INSN from TImode to V1T1mode. */
2240 void
2241 timode_scalar_chain::convert_insn (rtx_insn *insn)
2243 rtx def_set = single_set (insn);
2244 rtx src = SET_SRC (def_set);
2245 rtx dst = SET_DEST (def_set);
2247 switch (GET_CODE (dst))
2249 case REG:
2251 rtx tmp = find_reg_equal_equiv_note (insn);
2252 if (tmp)
2253 PUT_MODE (XEXP (tmp, 0), V1TImode);
2254 PUT_MODE (dst, V1TImode);
2255 fix_debug_reg_uses (dst);
2257 break;
2258 case MEM:
2259 PUT_MODE (dst, V1TImode);
2260 break;
2262 default:
2263 gcc_unreachable ();
2266 switch (GET_CODE (src))
2268 case REG:
2269 PUT_MODE (src, V1TImode);
2270 /* Call fix_debug_reg_uses only if SRC is never defined. */
2271 if (!DF_REG_DEF_CHAIN (REGNO (src)))
2272 fix_debug_reg_uses (src);
2273 break;
2275 case MEM:
2276 PUT_MODE (src, V1TImode);
2277 break;
2279 case CONST_WIDE_INT:
2280 if (NONDEBUG_INSN_P (insn))
2282 /* Since there are no instructions to store 128-bit constant,
2283 temporary register usage is required. */
2284 rtx tmp = gen_reg_rtx (V1TImode);
2285 start_sequence ();
2286 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
2287 src = validize_mem (force_const_mem (V1TImode, src));
2288 rtx_insn *seq = get_insns ();
2289 end_sequence ();
2290 if (seq)
2291 emit_insn_before (seq, insn);
2292 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2293 dst = tmp;
2295 break;
2297 case CONST_INT:
2298 switch (standard_sse_constant_p (src, TImode))
2300 case 1:
2301 src = CONST0_RTX (GET_MODE (dst));
2302 break;
2303 case 2:
2304 src = CONSTM1_RTX (GET_MODE (dst));
2305 break;
2306 default:
2307 gcc_unreachable ();
2309 if (NONDEBUG_INSN_P (insn))
2311 rtx tmp = gen_reg_rtx (V1TImode);
2312 /* Since there are no instructions to store standard SSE
2313 constant, temporary register usage is required. */
2314 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2315 dst = tmp;
2317 break;
2319 default:
2320 gcc_unreachable ();
2323 SET_SRC (def_set) = src;
2324 SET_DEST (def_set) = dst;
2326 /* Drop possible dead definitions. */
2327 PATTERN (insn) = def_set;
2329 INSN_CODE (insn) = -1;
2330 recog_memoized (insn);
2331 df_insn_rescan (insn);
2334 void
2335 dimode_scalar_chain::convert_registers ()
2337 bitmap_iterator bi;
2338 unsigned id;
2340 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
2341 convert_reg (id);
2343 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
2344 make_vector_copies (id);
2347 /* Convert whole chain creating required register
2348 conversions and copies. */
2351 scalar_chain::convert ()
2353 bitmap_iterator bi;
2354 unsigned id;
2355 int converted_insns = 0;
2357 if (!dbg_cnt (stv_conversion))
2358 return 0;
2360 if (dump_file)
2361 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
2363 convert_registers ();
2365 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
2367 convert_insn (DF_INSN_UID_GET (id)->insn);
2368 converted_insns++;
2371 return converted_insns;
2374 /* Main STV pass function. Find and convert scalar
2375 instructions into vector mode when profitable. */
2377 static unsigned int
2378 convert_scalars_to_vector ()
2380 basic_block bb;
2381 bitmap candidates;
2382 int converted_insns = 0;
2384 bitmap_obstack_initialize (NULL);
2385 candidates = BITMAP_ALLOC (NULL);
2387 calculate_dominance_info (CDI_DOMINATORS);
2388 df_set_flags (DF_DEFER_INSN_RESCAN);
2389 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2390 df_md_add_problem ();
2391 df_analyze ();
2393 /* Find all instructions we want to convert into vector mode. */
2394 if (dump_file)
2395 fprintf (dump_file, "Searching for mode conversion candidates...\n");
2397 FOR_EACH_BB_FN (bb, cfun)
2399 rtx_insn *insn;
2400 FOR_BB_INSNS (bb, insn)
2401 if (scalar_to_vector_candidate_p (insn))
2403 if (dump_file)
2404 fprintf (dump_file, " insn %d is marked as a candidate\n",
2405 INSN_UID (insn));
2407 bitmap_set_bit (candidates, INSN_UID (insn));
2411 remove_non_convertible_regs (candidates);
2413 if (bitmap_empty_p (candidates))
2414 if (dump_file)
2415 fprintf (dump_file, "There are no candidates for optimization.\n");
2417 while (!bitmap_empty_p (candidates))
2419 unsigned uid = bitmap_first_set_bit (candidates);
2420 scalar_chain *chain;
2422 if (TARGET_64BIT)
2423 chain = new timode_scalar_chain;
2424 else
2425 chain = new dimode_scalar_chain;
2427 /* Find instructions chain we want to convert to vector mode.
2428 Check all uses and definitions to estimate all required
2429 conversions. */
2430 chain->build (candidates, uid);
2432 if (chain->compute_convert_gain () > 0)
2433 converted_insns += chain->convert ();
2434 else
2435 if (dump_file)
2436 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
2437 chain->chain_id);
2439 delete chain;
2442 if (dump_file)
2443 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
2445 BITMAP_FREE (candidates);
2446 bitmap_obstack_release (NULL);
2447 df_process_deferred_rescans ();
2449 /* Conversion means we may have 128bit register spills/fills
2450 which require aligned stack. */
2451 if (converted_insns)
2453 if (crtl->stack_alignment_needed < 128)
2454 crtl->stack_alignment_needed = 128;
2455 if (crtl->stack_alignment_estimated < 128)
2456 crtl->stack_alignment_estimated = 128;
2457 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
2458 if (TARGET_64BIT)
2459 for (tree parm = DECL_ARGUMENTS (current_function_decl);
2460 parm; parm = DECL_CHAIN (parm))
2462 if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
2463 continue;
2464 if (DECL_RTL_SET_P (parm)
2465 && GET_MODE (DECL_RTL (parm)) == V1TImode)
2467 rtx r = DECL_RTL (parm);
2468 if (REG_P (r))
2469 SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
2471 if (DECL_INCOMING_RTL (parm)
2472 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
2474 rtx r = DECL_INCOMING_RTL (parm);
2475 if (REG_P (r))
2476 DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
2481 return 0;
2484 namespace {
2486 const pass_data pass_data_insert_vzeroupper =
2488 RTL_PASS, /* type */
2489 "vzeroupper", /* name */
2490 OPTGROUP_NONE, /* optinfo_flags */
2491 TV_MACH_DEP, /* tv_id */
2492 0, /* properties_required */
2493 0, /* properties_provided */
2494 0, /* properties_destroyed */
2495 0, /* todo_flags_start */
2496 TODO_df_finish, /* todo_flags_finish */
2499 class pass_insert_vzeroupper : public rtl_opt_pass
2501 public:
2502 pass_insert_vzeroupper(gcc::context *ctxt)
2503 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2506 /* opt_pass methods: */
2507 virtual bool gate (function *)
2509 return TARGET_AVX
2510 && TARGET_VZEROUPPER && flag_expensive_optimizations
2511 && !optimize_size;
2514 virtual unsigned int execute (function *)
2516 return rest_of_handle_insert_vzeroupper ();
2519 }; // class pass_insert_vzeroupper
2521 const pass_data pass_data_stv =
2523 RTL_PASS, /* type */
2524 "stv", /* name */
2525 OPTGROUP_NONE, /* optinfo_flags */
2526 TV_MACH_DEP, /* tv_id */
2527 0, /* properties_required */
2528 0, /* properties_provided */
2529 0, /* properties_destroyed */
2530 0, /* todo_flags_start */
2531 TODO_df_finish, /* todo_flags_finish */
2534 class pass_stv : public rtl_opt_pass
2536 public:
2537 pass_stv (gcc::context *ctxt)
2538 : rtl_opt_pass (pass_data_stv, ctxt),
2539 timode_p (false)
2542 /* opt_pass methods: */
2543 virtual bool gate (function *)
2545 return (timode_p == !!TARGET_64BIT
2546 && TARGET_STV && TARGET_SSE2 && optimize > 1);
2549 virtual unsigned int execute (function *)
2551 return convert_scalars_to_vector ();
2554 opt_pass *clone ()
2556 return new pass_stv (m_ctxt);
2559 void set_pass_param (unsigned int n, bool param)
2561 gcc_assert (n == 0);
2562 timode_p = param;
2565 private:
2566 bool timode_p;
2567 }; // class pass_stv
2569 } // anon namespace
2571 rtl_opt_pass *
2572 make_pass_insert_vzeroupper (gcc::context *ctxt)
2574 return new pass_insert_vzeroupper (ctxt);
2577 rtl_opt_pass *
2578 make_pass_stv (gcc::context *ctxt)
2580 return new pass_stv (ctxt);
2583 /* Inserting ENDBRANCH instructions. */
2585 static unsigned int
2586 rest_of_insert_endbranch (void)
2588 timevar_push (TV_MACH_DEP);
2590 rtx cet_eb;
2591 rtx_insn *insn;
2592 basic_block bb;
2594 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is
2595 absent among function attributes. Later an optimization will be
2596 introduced to make analysis if an address of a static function is
2597 taken. A static function whose address is not taken will get a
2598 nocf_check attribute. This will allow to reduce the number of EB. */
2600 if (!lookup_attribute ("nocf_check",
2601 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
2602 && !cgraph_node::get (cfun->decl)->only_called_directly_p ())
2604 cet_eb = gen_nop_endbr ();
2606 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
2607 insn = BB_HEAD (bb);
2608 emit_insn_before (cet_eb, insn);
2611 bb = 0;
2612 FOR_EACH_BB_FN (bb, cfun)
2614 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
2615 insn = NEXT_INSN (insn))
2617 if (CALL_P (insn))
2619 if (find_reg_note (insn, REG_SETJMP, NULL) == NULL)
2620 continue;
2621 /* Generate ENDBRANCH after CALL, which can return more than
2622 twice, setjmp-like functions. */
2624 cet_eb = gen_nop_endbr ();
2625 emit_insn_after_setloc (cet_eb, insn, INSN_LOCATION (insn));
2626 continue;
2629 if (JUMP_P (insn) && flag_cet_switch)
2631 rtx target = JUMP_LABEL (insn);
2632 if (target == NULL_RTX || ANY_RETURN_P (target))
2633 continue;
2635 /* Check the jump is a switch table. */
2636 rtx_insn *label = as_a<rtx_insn *> (target);
2637 rtx_insn *table = next_insn (label);
2638 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
2639 continue;
2641 /* For the indirect jump find out all places it jumps and insert
2642 ENDBRANCH there. It should be done under a special flag to
2643 control ENDBRANCH generation for switch stmts. */
2644 edge_iterator ei;
2645 edge e;
2646 basic_block dest_blk;
2648 FOR_EACH_EDGE (e, ei, bb->succs)
2650 rtx_insn *insn;
2652 dest_blk = e->dest;
2653 insn = BB_HEAD (dest_blk);
2654 gcc_assert (LABEL_P (insn));
2655 cet_eb = gen_nop_endbr ();
2656 emit_insn_after (cet_eb, insn);
2658 continue;
2661 if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn))
2662 || (NOTE_P (insn)
2663 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
2664 /* TODO. Check /s bit also. */
2666 cet_eb = gen_nop_endbr ();
2667 emit_insn_after (cet_eb, insn);
2668 continue;
2673 timevar_pop (TV_MACH_DEP);
2674 return 0;
2677 namespace {
2679 const pass_data pass_data_insert_endbranch =
2681 RTL_PASS, /* type. */
2682 "cet", /* name. */
2683 OPTGROUP_NONE, /* optinfo_flags. */
2684 TV_MACH_DEP, /* tv_id. */
2685 0, /* properties_required. */
2686 0, /* properties_provided. */
2687 0, /* properties_destroyed. */
2688 0, /* todo_flags_start. */
2689 0, /* todo_flags_finish. */
2692 class pass_insert_endbranch : public rtl_opt_pass
2694 public:
2695 pass_insert_endbranch (gcc::context *ctxt)
2696 : rtl_opt_pass (pass_data_insert_endbranch, ctxt)
2699 /* opt_pass methods: */
2700 virtual bool gate (function *)
2702 return ((flag_cf_protection & CF_BRANCH) && TARGET_IBT);
2705 virtual unsigned int execute (function *)
2707 return rest_of_insert_endbranch ();
2710 }; // class pass_insert_endbranch
2712 } // anon namespace
2714 rtl_opt_pass *
2715 make_pass_insert_endbranch (gcc::context *ctxt)
2717 return new pass_insert_endbranch (ctxt);
2720 /* Return true if a red-zone is in use. We can't use red-zone when
2721 there are local indirect jumps, like "indirect_jump" or "tablejump",
2722 which jumps to another place in the function, since "call" in the
2723 indirect thunk pushes the return address onto stack, destroying
2724 red-zone.
2726 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
2727 for CALL, in red-zone, we can allow local indirect jumps with
2728 indirect thunk. */
2730 bool
2731 ix86_using_red_zone (void)
2733 return (TARGET_RED_ZONE
2734 && !TARGET_64BIT_MS_ABI
2735 && (!cfun->machine->has_local_indirect_jump
2736 || cfun->machine->indirect_branch_type == indirect_branch_keep));
2739 /* Return a string that documents the current -m options. The caller is
2740 responsible for freeing the string. */
2742 static char *
2743 ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
2744 int flags, int flags2,
2745 const char *arch, const char *tune,
2746 enum fpmath_unit fpmath, bool add_nl_p)
2748 struct ix86_target_opts
2750 const char *option; /* option string */
2751 HOST_WIDE_INT mask; /* isa mask options */
2754 /* This table is ordered so that options like -msse4.2 that imply other
2755 ISAs come first. Target string will be displayed in the same order. */
2756 static struct ix86_target_opts isa2_opts[] =
2758 { "-mcx16", OPTION_MASK_ISA_CX16 },
2759 { "-mmpx", OPTION_MASK_ISA_MPX },
2760 { "-mvaes", OPTION_MASK_ISA_VAES },
2761 { "-mrdpid", OPTION_MASK_ISA_RDPID },
2762 { "-mpconfig", OPTION_MASK_ISA_PCONFIG },
2763 { "-mwbnoinvd", OPTION_MASK_ISA_WBNOINVD },
2764 { "-msgx", OPTION_MASK_ISA_SGX },
2765 { "-mavx5124vnniw", OPTION_MASK_ISA_AVX5124VNNIW },
2766 { "-mavx5124fmaps", OPTION_MASK_ISA_AVX5124FMAPS },
2767 { "-mibt", OPTION_MASK_ISA_IBT },
2768 { "-mhle", OPTION_MASK_ISA_HLE },
2769 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2770 { "-mclzero", OPTION_MASK_ISA_CLZERO },
2771 { "-mmwaitx", OPTION_MASK_ISA_MWAITX }
2773 static struct ix86_target_opts isa_opts[] =
2775 { "-mavx512vpopcntdq", OPTION_MASK_ISA_AVX512VPOPCNTDQ },
2776 { "-mavx512bitalg", OPTION_MASK_ISA_AVX512BITALG },
2777 { "-mvpclmulqdq", OPTION_MASK_ISA_VPCLMULQDQ },
2778 { "-mgfni", OPTION_MASK_ISA_GFNI },
2779 { "-mavx512vnni", OPTION_MASK_ISA_AVX512VNNI },
2780 { "-mavx512vbmi2", OPTION_MASK_ISA_AVX512VBMI2 },
2781 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2782 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2783 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2784 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2785 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2786 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2787 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2788 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2789 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2790 { "-mavx2", OPTION_MASK_ISA_AVX2 },
2791 { "-mfma", OPTION_MASK_ISA_FMA },
2792 { "-mxop", OPTION_MASK_ISA_XOP },
2793 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2794 { "-mf16c", OPTION_MASK_ISA_F16C },
2795 { "-mavx", OPTION_MASK_ISA_AVX },
2796 /* { "-msse4" OPTION_MASK_ISA_SSE4 }, */
2797 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2798 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2799 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2800 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2801 { "-msse3", OPTION_MASK_ISA_SSE3 },
2802 { "-maes", OPTION_MASK_ISA_AES },
2803 { "-msha", OPTION_MASK_ISA_SHA },
2804 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2805 { "-msse2", OPTION_MASK_ISA_SSE2 },
2806 { "-msse", OPTION_MASK_ISA_SSE },
2807 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2808 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2809 { "-mmmx", OPTION_MASK_ISA_MMX },
2810 { "-mrtm", OPTION_MASK_ISA_RTM },
2811 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2812 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2813 { "-madx", OPTION_MASK_ISA_ADX },
2814 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2815 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2816 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2817 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2818 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2819 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2820 { "-mabm", OPTION_MASK_ISA_ABM },
2821 { "-mbmi", OPTION_MASK_ISA_BMI },
2822 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2823 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2824 { "-mtbm", OPTION_MASK_ISA_TBM },
2825 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2826 { "-msahf", OPTION_MASK_ISA_SAHF },
2827 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2828 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2829 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2830 { "-mpku", OPTION_MASK_ISA_PKU },
2831 { "-mlwp", OPTION_MASK_ISA_LWP },
2832 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2833 { "-mclwb", OPTION_MASK_ISA_CLWB },
2834 { "-mshstk", OPTION_MASK_ISA_SHSTK }
2837 /* Flag options. */
2838 static struct ix86_target_opts flag_opts[] =
2840 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2841 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2842 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2843 { "-m80387", MASK_80387 },
2844 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2845 { "-malign-double", MASK_ALIGN_DOUBLE },
2846 { "-mcld", MASK_CLD },
2847 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2848 { "-mieee-fp", MASK_IEEE_FP },
2849 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2850 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2851 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2852 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2853 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2854 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2855 { "-mno-red-zone", MASK_NO_RED_ZONE },
2856 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2857 { "-mrecip", MASK_RECIP },
2858 { "-mrtd", MASK_RTD },
2859 { "-msseregparm", MASK_SSEREGPARM },
2860 { "-mstack-arg-probe", MASK_STACK_PROBE },
2861 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2862 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2863 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2864 { "-mvzeroupper", MASK_VZEROUPPER },
2865 { "-mstv", MASK_STV },
2866 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD },
2867 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE },
2868 { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES }
2871 /* Additional flag options. */
2872 static struct ix86_target_opts flag2_opts[] =
2874 { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY }
2877 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (isa2_opts)
2878 + ARRAY_SIZE (flag_opts) + ARRAY_SIZE (flag2_opts) + 6][2];
2880 char isa_other[40];
2881 char isa2_other[40];
2882 char flags_other[40];
2883 char flags2_other[40];
2884 unsigned num = 0;
2885 unsigned i, j;
2886 char *ret;
2887 char *ptr;
2888 size_t len;
2889 size_t line_len;
2890 size_t sep_len;
2891 const char *abi;
2893 memset (opts, '\0', sizeof (opts));
2895 /* Add -march= option. */
2896 if (arch)
2898 opts[num][0] = "-march=";
2899 opts[num++][1] = arch;
2902 /* Add -mtune= option. */
2903 if (tune)
2905 opts[num][0] = "-mtune=";
2906 opts[num++][1] = tune;
2909 /* Add -m32/-m64/-mx32. */
2910 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2912 if ((isa & OPTION_MASK_ABI_64) != 0)
2913 abi = "-m64";
2914 else
2915 abi = "-mx32";
2916 isa &= ~ (OPTION_MASK_ISA_64BIT
2917 | OPTION_MASK_ABI_64
2918 | OPTION_MASK_ABI_X32);
2920 else
2921 abi = "-m32";
2922 opts[num++][0] = abi;
2924 /* Pick out the options in isa2 options. */
2925 for (i = 0; i < ARRAY_SIZE (isa2_opts); i++)
2927 if ((isa2 & isa2_opts[i].mask) != 0)
2929 opts[num++][0] = isa2_opts[i].option;
2930 isa2 &= ~ isa2_opts[i].mask;
2934 if (isa2 && add_nl_p)
2936 opts[num++][0] = isa2_other;
2937 sprintf (isa2_other, "(other isa2: %#" HOST_WIDE_INT_PRINT "x)", isa2);
2940 /* Pick out the options in isa options. */
2941 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2943 if ((isa & isa_opts[i].mask) != 0)
2945 opts[num++][0] = isa_opts[i].option;
2946 isa &= ~ isa_opts[i].mask;
2950 if (isa && add_nl_p)
2952 opts[num++][0] = isa_other;
2953 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)", isa);
2956 /* Add flag options. */
2957 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2959 if ((flags & flag_opts[i].mask) != 0)
2961 opts[num++][0] = flag_opts[i].option;
2962 flags &= ~ flag_opts[i].mask;
2966 if (flags && add_nl_p)
2968 opts[num++][0] = flags_other;
2969 sprintf (flags_other, "(other flags: %#x)", flags);
2972 /* Add additional flag options. */
2973 for (i = 0; i < ARRAY_SIZE (flag2_opts); i++)
2975 if ((flags2 & flag2_opts[i].mask) != 0)
2977 opts[num++][0] = flag2_opts[i].option;
2978 flags2 &= ~ flag2_opts[i].mask;
2982 if (flags2 && add_nl_p)
2984 opts[num++][0] = flags2_other;
2985 sprintf (flags2_other, "(other flags2: %#x)", flags2);
2988 /* Add -fpmath= option. */
2989 if (fpmath)
2991 opts[num][0] = "-mfpmath=";
2992 switch ((int) fpmath)
2994 case FPMATH_387:
2995 opts[num++][1] = "387";
2996 break;
2998 case FPMATH_SSE:
2999 opts[num++][1] = "sse";
3000 break;
3002 case FPMATH_387 | FPMATH_SSE:
3003 opts[num++][1] = "sse+387";
3004 break;
3006 default:
3007 gcc_unreachable ();
3011 /* Any options? */
3012 if (num == 0)
3013 return NULL;
3015 gcc_assert (num < ARRAY_SIZE (opts));
3017 /* Size the string. */
3018 len = 0;
3019 sep_len = (add_nl_p) ? 3 : 1;
3020 for (i = 0; i < num; i++)
3022 len += sep_len;
3023 for (j = 0; j < 2; j++)
3024 if (opts[i][j])
3025 len += strlen (opts[i][j]);
3028 /* Build the string. */
3029 ret = ptr = (char *) xmalloc (len);
3030 line_len = 0;
3032 for (i = 0; i < num; i++)
3034 size_t len2[2];
3036 for (j = 0; j < 2; j++)
3037 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3039 if (i != 0)
3041 *ptr++ = ' ';
3042 line_len++;
3044 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3046 *ptr++ = '\\';
3047 *ptr++ = '\n';
3048 line_len = 0;
3052 for (j = 0; j < 2; j++)
3053 if (opts[i][j])
3055 memcpy (ptr, opts[i][j], len2[j]);
3056 ptr += len2[j];
3057 line_len += len2[j];
3061 *ptr = '\0';
3062 gcc_assert (ret + len >= ptr);
3064 return ret;
3067 /* Return true, if profiling code should be emitted before
3068 prologue. Otherwise it returns false.
3069 Note: For x86 with "hotfix" it is sorried. */
3070 static bool
3071 ix86_profile_before_prologue (void)
3073 return flag_fentry != 0;
3076 /* Function that is callable from the debugger to print the current
3077 options. */
3078 void ATTRIBUTE_UNUSED
3079 ix86_debug_options (void)
3081 char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2,
3082 target_flags, ix86_target_flags,
3083 ix86_arch_string,ix86_tune_string,
3084 ix86_fpmath, true);
3086 if (opts)
3088 fprintf (stderr, "%s\n\n", opts);
3089 free (opts);
3091 else
3092 fputs ("<no options>\n\n", stderr);
3094 return;
3097 /* Return true if T is one of the bytes we should avoid with
3098 -mmitigate-rop. */
3100 static bool
3101 ix86_rop_should_change_byte_p (int t)
3103 return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
3106 static const char *stringop_alg_names[] = {
3107 #define DEF_ENUM
3108 #define DEF_ALG(alg, name) #name,
3109 #include "stringop.def"
3110 #undef DEF_ENUM
3111 #undef DEF_ALG
3114 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
3115 The string is of the following form (or comma separated list of it):
3117 strategy_alg:max_size:[align|noalign]
3119 where the full size range for the strategy is either [0, max_size] or
3120 [min_size, max_size], in which min_size is the max_size + 1 of the
3121 preceding range. The last size range must have max_size == -1.
3123 Examples:
3126 -mmemcpy-strategy=libcall:-1:noalign
3128 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
3132 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
3134 This is to tell the compiler to use the following strategy for memset
3135 1) when the expected size is between [1, 16], use rep_8byte strategy;
3136 2) when the size is between [17, 2048], use vector_loop;
3137 3) when the size is > 2048, use libcall. */
3139 struct stringop_size_range
3141 int max;
3142 stringop_alg alg;
3143 bool noalign;
3146 static void
3147 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
3149 const struct stringop_algs *default_algs;
3150 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
3151 char *curr_range_str, *next_range_str;
3152 const char *opt = is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=";
3153 int i = 0, n = 0;
3155 if (is_memset)
3156 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
3157 else
3158 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
3160 curr_range_str = strategy_str;
3164 int maxs;
3165 char alg_name[128];
3166 char align[16];
3167 next_range_str = strchr (curr_range_str, ',');
3168 if (next_range_str)
3169 *next_range_str++ = '\0';
3171 if (sscanf (curr_range_str, "%20[^:]:%d:%10s", alg_name, &maxs,
3172 align) != 3)
3174 error ("wrong argument %qs to option %qs", curr_range_str, opt);
3175 return;
3178 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
3180 error ("size ranges of option %qs should be increasing", opt);
3181 return;
3184 for (i = 0; i < last_alg; i++)
3185 if (!strcmp (alg_name, stringop_alg_names[i]))
3186 break;
3188 if (i == last_alg)
3190 error ("wrong strategy name %qs specified for option %qs",
3191 alg_name, opt);
3193 auto_vec <const char *> candidates;
3194 for (i = 0; i < last_alg; i++)
3195 if ((stringop_alg) i != rep_prefix_8_byte || TARGET_64BIT)
3196 candidates.safe_push (stringop_alg_names[i]);
3198 char *s;
3199 const char *hint
3200 = candidates_list_and_hint (alg_name, s, candidates);
3201 if (hint)
3202 inform (input_location,
3203 "valid arguments to %qs are: %s; did you mean %qs?",
3204 opt, s, hint);
3205 else
3206 inform (input_location, "valid arguments to %qs are: %s",
3207 opt, s);
3208 XDELETEVEC (s);
3209 return;
3212 if ((stringop_alg) i == rep_prefix_8_byte
3213 && !TARGET_64BIT)
3215 /* rep; movq isn't available in 32-bit code. */
3216 error ("strategy name %qs specified for option %qs "
3217 "not supported for 32-bit code", alg_name, opt);
3218 return;
3221 input_ranges[n].max = maxs;
3222 input_ranges[n].alg = (stringop_alg) i;
3223 if (!strcmp (align, "align"))
3224 input_ranges[n].noalign = false;
3225 else if (!strcmp (align, "noalign"))
3226 input_ranges[n].noalign = true;
3227 else
3229 error ("unknown alignment %qs specified for option %qs", align, opt);
3230 return;
3232 n++;
3233 curr_range_str = next_range_str;
3235 while (curr_range_str);
3237 if (input_ranges[n - 1].max != -1)
3239 error ("the max value for the last size range should be -1"
3240 " for option %qs", opt);
3241 return;
3244 if (n > MAX_STRINGOP_ALGS)
3246 error ("too many size ranges specified in option %qs", opt);
3247 return;
3250 /* Now override the default algs array. */
3251 for (i = 0; i < n; i++)
3253 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3254 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3255 = input_ranges[i].alg;
3256 *const_cast<int *>(&default_algs->size[i].noalign)
3257 = input_ranges[i].noalign;
3262 /* parse -mtune-ctrl= option. When DUMP is true,
3263 print the features that are explicitly set. */
3265 static void
3266 parse_mtune_ctrl_str (bool dump)
3268 if (!ix86_tune_ctrl_string)
3269 return;
3271 char *next_feature_string = NULL;
3272 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3273 char *orig = curr_feature_string;
3274 int i;
3277 bool clear = false;
3279 next_feature_string = strchr (curr_feature_string, ',');
3280 if (next_feature_string)
3281 *next_feature_string++ = '\0';
3282 if (*curr_feature_string == '^')
3284 curr_feature_string++;
3285 clear = true;
3287 for (i = 0; i < X86_TUNE_LAST; i++)
3289 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3291 ix86_tune_features[i] = !clear;
3292 if (dump)
3293 fprintf (stderr, "Explicitly %s feature %s\n",
3294 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3295 break;
3298 if (i == X86_TUNE_LAST)
3299 error ("unknown parameter to option -mtune-ctrl: %s",
3300 clear ? curr_feature_string - 1 : curr_feature_string);
3301 curr_feature_string = next_feature_string;
3303 while (curr_feature_string);
3304 free (orig);
3307 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3308 processor type. */
3310 static void
3311 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3313 unsigned HOST_WIDE_INT ix86_tune_mask = HOST_WIDE_INT_1U << ix86_tune;
3314 int i;
3316 for (i = 0; i < X86_TUNE_LAST; ++i)
3318 if (ix86_tune_no_default)
3319 ix86_tune_features[i] = 0;
3320 else
3321 ix86_tune_features[i]
3322 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3325 if (dump)
3327 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3328 for (i = 0; i < X86_TUNE_LAST; i++)
3329 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3330 ix86_tune_features[i] ? "on" : "off");
3333 parse_mtune_ctrl_str (dump);
3337 /* Default align_* from the processor table. */
3339 static void
3340 ix86_default_align (struct gcc_options *opts)
3342 if (opts->x_align_loops == 0)
3344 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3345 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3347 if (opts->x_align_jumps == 0)
3349 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3350 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3352 if (opts->x_align_functions == 0)
3354 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3358 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3360 static void
3361 ix86_override_options_after_change (void)
3363 ix86_default_align (&global_options);
3366 /* Override various settings based on options. If MAIN_ARGS_P, the
3367 options are from the command line, otherwise they are from
3368 attributes. Return true if there's an error related to march
3369 option. */
3371 static bool
3372 ix86_option_override_internal (bool main_args_p,
3373 struct gcc_options *opts,
3374 struct gcc_options *opts_set)
3376 int i;
3377 unsigned HOST_WIDE_INT ix86_arch_mask;
3378 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3380 const wide_int_bitmask PTA_3DNOW (HOST_WIDE_INT_1U << 0);
3381 const wide_int_bitmask PTA_3DNOW_A (HOST_WIDE_INT_1U << 1);
3382 const wide_int_bitmask PTA_64BIT (HOST_WIDE_INT_1U << 2);
3383 const wide_int_bitmask PTA_ABM (HOST_WIDE_INT_1U << 3);
3384 const wide_int_bitmask PTA_AES (HOST_WIDE_INT_1U << 4);
3385 const wide_int_bitmask PTA_AVX (HOST_WIDE_INT_1U << 5);
3386 const wide_int_bitmask PTA_BMI (HOST_WIDE_INT_1U << 6);
3387 const wide_int_bitmask PTA_CX16 (HOST_WIDE_INT_1U << 7);
3388 const wide_int_bitmask PTA_F16C (HOST_WIDE_INT_1U << 8);
3389 const wide_int_bitmask PTA_FMA (HOST_WIDE_INT_1U << 9);
3390 const wide_int_bitmask PTA_FMA4 (HOST_WIDE_INT_1U << 10);
3391 const wide_int_bitmask PTA_FSGSBASE (HOST_WIDE_INT_1U << 11);
3392 const wide_int_bitmask PTA_LWP (HOST_WIDE_INT_1U << 12);
3393 const wide_int_bitmask PTA_LZCNT (HOST_WIDE_INT_1U << 13);
3394 const wide_int_bitmask PTA_MMX (HOST_WIDE_INT_1U << 14);
3395 const wide_int_bitmask PTA_MOVBE (HOST_WIDE_INT_1U << 15);
3396 const wide_int_bitmask PTA_NO_SAHF (HOST_WIDE_INT_1U << 16);
3397 const wide_int_bitmask PTA_PCLMUL (HOST_WIDE_INT_1U << 17);
3398 const wide_int_bitmask PTA_POPCNT (HOST_WIDE_INT_1U << 18);
3399 const wide_int_bitmask PTA_PREFETCH_SSE (HOST_WIDE_INT_1U << 19);
3400 const wide_int_bitmask PTA_RDRND (HOST_WIDE_INT_1U << 20);
3401 const wide_int_bitmask PTA_SSE (HOST_WIDE_INT_1U << 21);
3402 const wide_int_bitmask PTA_SSE2 (HOST_WIDE_INT_1U << 22);
3403 const wide_int_bitmask PTA_SSE3 (HOST_WIDE_INT_1U << 23);
3404 const wide_int_bitmask PTA_SSE4_1 (HOST_WIDE_INT_1U << 24);
3405 const wide_int_bitmask PTA_SSE4_2 (HOST_WIDE_INT_1U << 25);
3406 const wide_int_bitmask PTA_SSE4A (HOST_WIDE_INT_1U << 26);
3407 const wide_int_bitmask PTA_SSSE3 (HOST_WIDE_INT_1U << 27);
3408 const wide_int_bitmask PTA_TBM (HOST_WIDE_INT_1U << 28);
3409 const wide_int_bitmask PTA_XOP (HOST_WIDE_INT_1U << 29);
3410 const wide_int_bitmask PTA_AVX2 (HOST_WIDE_INT_1U << 30);
3411 const wide_int_bitmask PTA_BMI2 (HOST_WIDE_INT_1U << 31);
3412 const wide_int_bitmask PTA_RTM (HOST_WIDE_INT_1U << 32);
3413 const wide_int_bitmask PTA_HLE (HOST_WIDE_INT_1U << 33);
3414 const wide_int_bitmask PTA_PRFCHW (HOST_WIDE_INT_1U << 34);
3415 const wide_int_bitmask PTA_RDSEED (HOST_WIDE_INT_1U << 35);
3416 const wide_int_bitmask PTA_ADX (HOST_WIDE_INT_1U << 36);
3417 const wide_int_bitmask PTA_FXSR (HOST_WIDE_INT_1U << 37);
3418 const wide_int_bitmask PTA_XSAVE (HOST_WIDE_INT_1U << 38);
3419 const wide_int_bitmask PTA_XSAVEOPT (HOST_WIDE_INT_1U << 39);
3420 const wide_int_bitmask PTA_AVX512F (HOST_WIDE_INT_1U << 40);
3421 const wide_int_bitmask PTA_AVX512ER (HOST_WIDE_INT_1U << 41);
3422 const wide_int_bitmask PTA_AVX512PF (HOST_WIDE_INT_1U << 42);
3423 const wide_int_bitmask PTA_AVX512CD (HOST_WIDE_INT_1U << 43);
3424 const wide_int_bitmask PTA_MPX (HOST_WIDE_INT_1U << 44);
3425 const wide_int_bitmask PTA_SHA (HOST_WIDE_INT_1U << 45);
3426 const wide_int_bitmask PTA_PREFETCHWT1 (HOST_WIDE_INT_1U << 46);
3427 const wide_int_bitmask PTA_CLFLUSHOPT (HOST_WIDE_INT_1U << 47);
3428 const wide_int_bitmask PTA_XSAVEC (HOST_WIDE_INT_1U << 48);
3429 const wide_int_bitmask PTA_XSAVES (HOST_WIDE_INT_1U << 49);
3430 const wide_int_bitmask PTA_AVX512DQ (HOST_WIDE_INT_1U << 50);
3431 const wide_int_bitmask PTA_AVX512BW (HOST_WIDE_INT_1U << 51);
3432 const wide_int_bitmask PTA_AVX512VL (HOST_WIDE_INT_1U << 52);
3433 const wide_int_bitmask PTA_AVX512IFMA (HOST_WIDE_INT_1U << 53);
3434 const wide_int_bitmask PTA_AVX512VBMI (HOST_WIDE_INT_1U << 54);
3435 const wide_int_bitmask PTA_CLWB (HOST_WIDE_INT_1U << 55);
3436 const wide_int_bitmask PTA_MWAITX (HOST_WIDE_INT_1U << 56);
3437 const wide_int_bitmask PTA_CLZERO (HOST_WIDE_INT_1U << 57);
3438 const wide_int_bitmask PTA_NO_80387 (HOST_WIDE_INT_1U << 58);
3439 const wide_int_bitmask PTA_PKU (HOST_WIDE_INT_1U << 59);
3440 const wide_int_bitmask PTA_AVX5124VNNIW (HOST_WIDE_INT_1U << 60);
3441 const wide_int_bitmask PTA_AVX5124FMAPS (HOST_WIDE_INT_1U << 61);
3442 const wide_int_bitmask PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1U << 62);
3443 const wide_int_bitmask PTA_SGX (HOST_WIDE_INT_1U << 63);
3444 const wide_int_bitmask PTA_AVX512VNNI (0, HOST_WIDE_INT_1U);
3445 const wide_int_bitmask PTA_GFNI (0, HOST_WIDE_INT_1U << 1);
3446 const wide_int_bitmask PTA_VAES (0, HOST_WIDE_INT_1U << 2);
3447 const wide_int_bitmask PTA_AVX512VBMI2 (0, HOST_WIDE_INT_1U << 3);
3448 const wide_int_bitmask PTA_VPCLMULQDQ (0, HOST_WIDE_INT_1U << 4);
3449 const wide_int_bitmask PTA_AVX512BITALG (0, HOST_WIDE_INT_1U << 5);
3450 const wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6);
3451 const wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7);
3452 const wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8);
3454 const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
3455 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
3456 const wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2
3457 | PTA_POPCNT;
3458 const wide_int_bitmask PTA_WESTMERE = PTA_NEHALEM | PTA_AES | PTA_PCLMUL;
3459 const wide_int_bitmask PTA_SANDYBRIDGE = PTA_WESTMERE | PTA_AVX | PTA_XSAVE
3460 | PTA_XSAVEOPT;
3461 const wide_int_bitmask PTA_IVYBRIDGE = PTA_SANDYBRIDGE | PTA_FSGSBASE
3462 | PTA_RDRND | PTA_F16C;
3463 const wide_int_bitmask PTA_HASWELL = PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI
3464 | PTA_BMI2 | PTA_LZCNT | PTA_FMA | PTA_MOVBE | PTA_HLE;
3465 const wide_int_bitmask PTA_BROADWELL = PTA_HASWELL | PTA_ADX | PTA_PRFCHW
3466 | PTA_RDSEED;
3467 const wide_int_bitmask PTA_SKYLAKE = PTA_BROADWELL | PTA_CLFLUSHOPT
3468 | PTA_XSAVEC | PTA_XSAVES | PTA_SGX;
3469 const wide_int_bitmask PTA_SKYLAKE_AVX512 = PTA_SKYLAKE | PTA_AVX512F
3470 | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
3471 | PTA_CLWB;
3472 const wide_int_bitmask PTA_CANNONLAKE = PTA_SKYLAKE | PTA_AVX512F
3473 | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
3474 | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA;
3475 const wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI
3476 | PTA_GFNI | PTA_VAES | PTA_AVX512VBMI2 | PTA_VPCLMULQDQ | PTA_AVX512BITALG
3477 | PTA_RDPID | PTA_CLWB;
3478 const wide_int_bitmask PTA_ICELAKE_SERVER = PTA_ICELAKE_CLIENT | PTA_PCONFIG
3479 | PTA_WBNOINVD;
3480 const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER
3481 | PTA_AVX512F | PTA_AVX512CD;
3482 const wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE;
3483 const wide_int_bitmask PTA_SILVERMONT = PTA_WESTMERE | PTA_MOVBE | PTA_RDRND;
3484 const wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
3485 | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
3487 static struct pta
3489 const char *const name; /* processor name or nickname. */
3490 const enum processor_type processor;
3491 const enum attr_cpu schedule;
3492 const wide_int_bitmask flags;
3494 const processor_alias_table[] =
3496 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3497 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3498 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3499 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3500 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
3501 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3502 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3503 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3504 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3505 {"samuel-2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3506 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3507 PTA_MMX | PTA_SSE | PTA_FXSR},
3508 {"nehemiah", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3509 PTA_MMX | PTA_SSE | PTA_FXSR},
3510 {"c7", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3511 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3512 {"esther", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3513 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3514 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3515 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3516 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3517 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3518 PTA_MMX | PTA_SSE | PTA_FXSR},
3519 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3520 PTA_MMX | PTA_SSE | PTA_FXSR},
3521 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3522 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3523 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3524 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3525 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3526 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3527 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3528 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3529 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3530 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3531 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3532 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3533 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3534 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3535 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3536 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3537 PTA_SANDYBRIDGE},
3538 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3539 PTA_SANDYBRIDGE},
3540 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3541 PTA_IVYBRIDGE},
3542 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3543 PTA_IVYBRIDGE},
3544 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3545 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3546 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
3547 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
3548 {"skylake-avx512", PROCESSOR_SKYLAKE_AVX512, CPU_HASWELL,
3549 PTA_SKYLAKE_AVX512},
3550 {"cannonlake", PROCESSOR_SKYLAKE_AVX512, CPU_HASWELL, PTA_CANNONLAKE},
3551 {"icelake-client", PROCESSOR_ICELAKE_CLIENT, CPU_HASWELL,
3552 PTA_ICELAKE_CLIENT},
3553 {"icelake-server", PROCESSOR_ICELAKE_SERVER, CPU_HASWELL,
3554 PTA_ICELAKE_SERVER},
3555 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3556 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3557 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3558 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3559 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
3560 {"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM},
3561 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3562 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3563 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3564 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3565 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3566 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3567 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3568 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3569 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3570 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3571 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3572 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3573 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3574 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3575 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3576 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3577 {"x86-64", PROCESSOR_K8, CPU_K8,
3578 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3579 {"eden-x2", PROCESSOR_K8, CPU_K8,
3580 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3581 {"nano", PROCESSOR_K8, CPU_K8,
3582 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3583 | PTA_SSSE3 | PTA_FXSR},
3584 {"nano-1000", PROCESSOR_K8, CPU_K8,
3585 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3586 | PTA_SSSE3 | PTA_FXSR},
3587 {"nano-2000", PROCESSOR_K8, CPU_K8,
3588 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3589 | PTA_SSSE3 | PTA_FXSR},
3590 {"nano-3000", PROCESSOR_K8, CPU_K8,
3591 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3592 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3593 {"nano-x2", PROCESSOR_K8, CPU_K8,
3594 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3595 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3596 {"eden-x4", PROCESSOR_K8, CPU_K8,
3597 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3598 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3599 {"nano-x4", PROCESSOR_K8, CPU_K8,
3600 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3601 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3602 {"k8", PROCESSOR_K8, CPU_K8,
3603 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3604 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3605 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3606 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3607 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3608 {"opteron", PROCESSOR_K8, CPU_K8,
3609 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3610 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3611 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3612 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3613 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3614 {"athlon64", PROCESSOR_K8, CPU_K8,
3615 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3616 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3617 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3618 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3619 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3620 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3621 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3622 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3623 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3624 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3625 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3626 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3627 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3628 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3629 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3630 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3631 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3632 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3633 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3634 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3635 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3636 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3637 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3638 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3639 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3640 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3641 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3642 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3643 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3644 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3645 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3646 | PTA_XSAVEOPT | PTA_FSGSBASE},
3647 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3648 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3649 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3650 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3651 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3652 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3653 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3654 | PTA_MOVBE | PTA_MWAITX},
3655 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
3656 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3657 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3658 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3659 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
3660 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
3661 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
3662 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
3663 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
3664 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3665 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3666 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3667 | PTA_FXSR | PTA_XSAVE},
3668 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3669 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3670 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3671 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3672 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3673 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3675 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3676 PTA_64BIT
3677 | PTA_HLE /* flags are only used for -march switch. */ },
3680 /* -mrecip options. */
3681 static struct
3683 const char *string; /* option name */
3684 unsigned int mask; /* mask bits to set */
3686 const recip_options[] =
3688 { "all", RECIP_MASK_ALL },
3689 { "none", RECIP_MASK_NONE },
3690 { "div", RECIP_MASK_DIV },
3691 { "sqrt", RECIP_MASK_SQRT },
3692 { "vec-div", RECIP_MASK_VEC_DIV },
3693 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3696 int const pta_size = ARRAY_SIZE (processor_alias_table);
3698 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3699 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3700 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3701 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3702 #ifdef TARGET_BI_ARCH
3703 else
3705 #if TARGET_BI_ARCH == 1
3706 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3707 is on and OPTION_MASK_ABI_X32 is off. We turn off
3708 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3709 -mx32. */
3710 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3711 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3712 #else
3713 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3714 on and OPTION_MASK_ABI_64 is off. We turn off
3715 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3716 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3717 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3718 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3719 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3720 #endif
3721 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3722 && TARGET_IAMCU_P (opts->x_target_flags))
3723 sorry ("Intel MCU psABI isn%'t supported in %s mode",
3724 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
3726 #endif
3728 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3730 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3731 OPTION_MASK_ABI_64 for TARGET_X32. */
3732 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3733 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3735 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3736 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3737 | OPTION_MASK_ABI_X32
3738 | OPTION_MASK_ABI_64);
3739 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3741 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3742 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3743 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3744 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3747 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3748 SUBTARGET_OVERRIDE_OPTIONS;
3749 #endif
3751 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3752 SUBSUBTARGET_OVERRIDE_OPTIONS;
3753 #endif
3755 /* -fPIC is the default for x86_64. */
3756 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3757 opts->x_flag_pic = 2;
3759 /* Need to check -mtune=generic first. */
3760 if (opts->x_ix86_tune_string)
3762 /* As special support for cross compilers we read -mtune=native
3763 as -mtune=generic. With native compilers we won't see the
3764 -mtune=native, as it was changed by the driver. */
3765 if (!strcmp (opts->x_ix86_tune_string, "native"))
3767 opts->x_ix86_tune_string = "generic";
3769 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3770 warning (OPT_Wdeprecated,
3771 main_args_p
3772 ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> "
3773 "or %<-mtune=generic%> instead as appropriate")
3774 : G_("%<target(\"tune=x86-64\")%> is deprecated; use "
3775 "%<target(\"tune=k8\")%> or %<target(\"tune=generic\")%>"
3776 " instead as appropriate"));
3778 else
3780 if (opts->x_ix86_arch_string)
3781 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3782 if (!opts->x_ix86_tune_string)
3784 opts->x_ix86_tune_string
3785 = processor_target_table[TARGET_CPU_DEFAULT].name;
3786 ix86_tune_defaulted = 1;
3789 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3790 or defaulted. We need to use a sensible tune option. */
3791 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3793 opts->x_ix86_tune_string = "generic";
3797 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3798 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3800 /* rep; movq isn't available in 32-bit code. */
3801 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3802 opts->x_ix86_stringop_alg = no_stringop;
3805 if (!opts->x_ix86_arch_string)
3806 opts->x_ix86_arch_string
3807 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3808 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3809 else
3810 ix86_arch_specified = 1;
3812 if (opts_set->x_ix86_pmode)
3814 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3815 && opts->x_ix86_pmode == PMODE_SI)
3816 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3817 && opts->x_ix86_pmode == PMODE_DI))
3818 error ("address mode %qs not supported in the %s bit mode",
3819 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3820 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3822 else
3823 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3824 ? PMODE_DI : PMODE_SI;
3826 if (!opts_set->x_ix86_abi)
3827 opts->x_ix86_abi = DEFAULT_ABI;
3829 if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags))
3830 error ("-mabi=ms not supported with X32 ABI");
3831 gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI);
3833 /* For targets using ms ABI enable ms-extensions, if not
3834 explicit turned off. For non-ms ABI we turn off this
3835 option. */
3836 if (!opts_set->x_flag_ms_extensions)
3837 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3839 if (opts_set->x_ix86_cmodel)
3841 switch (opts->x_ix86_cmodel)
3843 case CM_SMALL:
3844 case CM_SMALL_PIC:
3845 if (opts->x_flag_pic)
3846 opts->x_ix86_cmodel = CM_SMALL_PIC;
3847 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3848 error ("code model %qs not supported in the %s bit mode",
3849 "small", "32");
3850 break;
3852 case CM_MEDIUM:
3853 case CM_MEDIUM_PIC:
3854 if (opts->x_flag_pic)
3855 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3856 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3857 error ("code model %qs not supported in the %s bit mode",
3858 "medium", "32");
3859 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3860 error ("code model %qs not supported in x32 mode",
3861 "medium");
3862 break;
3864 case CM_LARGE:
3865 case CM_LARGE_PIC:
3866 if (opts->x_flag_pic)
3867 opts->x_ix86_cmodel = CM_LARGE_PIC;
3868 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3869 error ("code model %qs not supported in the %s bit mode",
3870 "large", "32");
3871 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3872 error ("code model %qs not supported in x32 mode",
3873 "large");
3874 break;
3876 case CM_32:
3877 if (opts->x_flag_pic)
3878 error ("code model %s does not support PIC mode", "32");
3879 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3880 error ("code model %qs not supported in the %s bit mode",
3881 "32", "64");
3882 break;
3884 case CM_KERNEL:
3885 if (opts->x_flag_pic)
3887 error ("code model %s does not support PIC mode", "kernel");
3888 opts->x_ix86_cmodel = CM_32;
3890 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3891 error ("code model %qs not supported in the %s bit mode",
3892 "kernel", "32");
3893 break;
3895 default:
3896 gcc_unreachable ();
3899 else
3901 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3902 use of rip-relative addressing. This eliminates fixups that
3903 would otherwise be needed if this object is to be placed in a
3904 DLL, and is essentially just as efficient as direct addressing. */
3905 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3906 && (TARGET_RDOS || TARGET_PECOFF))
3907 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3908 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3909 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3910 else
3911 opts->x_ix86_cmodel = CM_32;
3913 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3915 error ("-masm=intel not supported in this configuration");
3916 opts->x_ix86_asm_dialect = ASM_ATT;
3918 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3919 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3920 sorry ("%i-bit mode not compiled in",
3921 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3923 for (i = 0; i < pta_size; i++)
3924 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3926 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3928 error (main_args_p
3929 ? G_("%<generic%> CPU can be used only for %<-mtune=%> "
3930 "switch")
3931 : G_("%<generic%> CPU can be used only for "
3932 "%<target(\"tune=\")%> attribute"));
3933 return false;
3935 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3937 error (main_args_p
3938 ? G_("%<intel%> CPU can be used only for %<-mtune=%> "
3939 "switch")
3940 : G_("%<intel%> CPU can be used only for "
3941 "%<target(\"tune=\")%> attribute"));
3942 return false;
3945 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3946 && !((processor_alias_table[i].flags & PTA_64BIT) != 0))
3948 error ("CPU you selected does not support x86-64 "
3949 "instruction set");
3950 return false;
3953 ix86_schedule = processor_alias_table[i].schedule;
3954 ix86_arch = processor_alias_table[i].processor;
3955 /* Default cpu tuning to the architecture. */
3956 ix86_tune = ix86_arch;
3958 if (((processor_alias_table[i].flags & PTA_MMX) != 0)
3959 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3960 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3961 if (((processor_alias_table[i].flags & PTA_3DNOW) != 0)
3962 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3963 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3964 if (((processor_alias_table[i].flags & PTA_3DNOW_A) != 0)
3965 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3966 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3967 if (((processor_alias_table[i].flags & PTA_SSE) != 0)
3968 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3969 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3970 if (((processor_alias_table[i].flags & PTA_SSE2) != 0)
3971 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3972 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3973 if (((processor_alias_table[i].flags & PTA_SSE3) != 0)
3974 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3975 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3976 if (((processor_alias_table[i].flags & PTA_SSSE3) != 0)
3977 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3978 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3979 if (((processor_alias_table[i].flags & PTA_SSE4_1) != 0)
3980 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3981 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3982 if (((processor_alias_table[i].flags & PTA_SSE4_2) != 0)
3983 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3984 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3985 if (((processor_alias_table[i].flags & PTA_AVX) != 0)
3986 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3987 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3988 if (((processor_alias_table[i].flags & PTA_AVX2) != 0)
3989 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3990 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3991 if (((processor_alias_table[i].flags & PTA_FMA) != 0)
3992 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3993 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3994 if (((processor_alias_table[i].flags & PTA_SSE4A) != 0)
3995 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3996 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3997 if (((processor_alias_table[i].flags & PTA_FMA4) != 0)
3998 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3999 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
4000 if (((processor_alias_table[i].flags & PTA_XOP) != 0)
4001 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
4002 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
4003 if (((processor_alias_table[i].flags & PTA_LWP) != 0)
4004 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
4005 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
4006 if (((processor_alias_table[i].flags & PTA_ABM) != 0)
4007 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
4008 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
4009 if (((processor_alias_table[i].flags & PTA_BMI) != 0)
4010 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
4011 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
4012 if (((processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)) != 0)
4013 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
4014 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
4015 if (((processor_alias_table[i].flags & PTA_TBM) != 0)
4016 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
4017 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
4018 if (((processor_alias_table[i].flags & PTA_BMI2) != 0)
4019 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
4020 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
4021 if (((processor_alias_table[i].flags & PTA_CX16) != 0)
4022 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CX16))
4023 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16;
4024 if (((processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) != 0)
4025 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
4026 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
4027 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
4028 && ((processor_alias_table[i].flags & PTA_NO_SAHF) != 0))
4029 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
4030 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
4031 if (((processor_alias_table[i].flags & PTA_MOVBE) != 0)
4032 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MOVBE))
4033 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MOVBE;
4034 if (((processor_alias_table[i].flags & PTA_AES) != 0)
4035 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
4036 ix86_isa_flags |= OPTION_MASK_ISA_AES;
4037 if (((processor_alias_table[i].flags & PTA_SHA) != 0)
4038 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
4039 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
4040 if (((processor_alias_table[i].flags & PTA_PCLMUL) != 0)
4041 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
4042 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
4043 if (((processor_alias_table[i].flags & PTA_FSGSBASE) != 0)
4044 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
4045 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
4046 if (((processor_alias_table[i].flags & PTA_RDRND) != 0)
4047 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
4048 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
4049 if (((processor_alias_table[i].flags & PTA_F16C) != 0)
4050 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
4051 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
4052 if (((processor_alias_table[i].flags & PTA_RTM) != 0)
4053 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
4054 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
4055 if (((processor_alias_table[i].flags & PTA_HLE) != 0)
4056 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_HLE))
4057 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_HLE;
4058 if (((processor_alias_table[i].flags & PTA_PRFCHW) != 0)
4059 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
4060 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
4061 if (((processor_alias_table[i].flags & PTA_RDSEED) != 0)
4062 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
4063 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
4064 if (((processor_alias_table[i].flags & PTA_ADX) != 0)
4065 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
4066 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
4067 if (((processor_alias_table[i].flags & PTA_FXSR) != 0)
4068 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
4069 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
4070 if (((processor_alias_table[i].flags & PTA_XSAVE) != 0)
4071 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
4072 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
4073 if (((processor_alias_table[i].flags & PTA_XSAVEOPT) != 0)
4074 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
4075 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
4076 if (((processor_alias_table[i].flags & PTA_AVX512F) != 0)
4077 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
4078 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
4079 if (((processor_alias_table[i].flags & PTA_AVX512ER) != 0)
4080 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
4081 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
4082 if (((processor_alias_table[i].flags & PTA_AVX512PF) != 0)
4083 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
4084 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
4085 if (((processor_alias_table[i].flags & PTA_AVX512CD) != 0)
4086 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
4087 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
4088 if (((processor_alias_table[i].flags & PTA_PREFETCHWT1) != 0)
4089 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
4090 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
4091 if (((processor_alias_table[i].flags & PTA_CLWB) != 0)
4092 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
4093 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
4094 if (((processor_alias_table[i].flags & PTA_CLFLUSHOPT) != 0)
4095 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
4096 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
4097 if (((processor_alias_table[i].flags & PTA_CLZERO) != 0)
4098 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CLZERO))
4099 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CLZERO;
4100 if (((processor_alias_table[i].flags & PTA_XSAVEC) != 0)
4101 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
4102 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
4103 if (((processor_alias_table[i].flags & PTA_XSAVES) != 0)
4104 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
4105 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
4106 if (((processor_alias_table[i].flags & PTA_AVX512DQ) != 0)
4107 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
4108 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
4109 if (((processor_alias_table[i].flags & PTA_AVX512BW) != 0)
4110 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
4111 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
4112 if (((processor_alias_table[i].flags & PTA_AVX512VL) != 0)
4113 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
4114 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
4115 if (((processor_alias_table[i].flags & PTA_MPX) != 0)
4116 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MPX))
4117 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MPX;
4118 if (((processor_alias_table[i].flags & PTA_AVX512VBMI) != 0)
4119 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
4120 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
4121 if (((processor_alias_table[i].flags & PTA_AVX512IFMA) != 0)
4122 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
4123 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
4124 if (((processor_alias_table[i].flags & PTA_AVX512VNNI) != 0)
4125 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VNNI))
4126 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VNNI;
4127 if (((processor_alias_table[i].flags & PTA_GFNI) != 0)
4128 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_GFNI))
4129 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_GFNI;
4130 if (((processor_alias_table[i].flags & PTA_AVX512VBMI2) != 0)
4131 && !(opts->x_ix86_isa_flags_explicit
4132 & OPTION_MASK_ISA_AVX512VBMI2))
4133 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI2;
4134 if (((processor_alias_table[i].flags & PTA_VPCLMULQDQ) != 0)
4135 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_VPCLMULQDQ))
4136 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_VPCLMULQDQ;
4137 if (((processor_alias_table[i].flags & PTA_AVX512BITALG) != 0)
4138 && !(opts->x_ix86_isa_flags_explicit
4139 & OPTION_MASK_ISA_AVX512BITALG))
4140 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BITALG;
4142 if (((processor_alias_table[i].flags & PTA_AVX5124VNNIW) != 0)
4143 && !(opts->x_ix86_isa_flags2_explicit
4144 & OPTION_MASK_ISA_AVX5124VNNIW))
4145 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW;
4146 if (((processor_alias_table[i].flags & PTA_AVX5124FMAPS) != 0)
4147 && !(opts->x_ix86_isa_flags2_explicit
4148 & OPTION_MASK_ISA_AVX5124FMAPS))
4149 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS;
4150 if (((processor_alias_table[i].flags & PTA_AVX512VPOPCNTDQ) != 0)
4151 && !(opts->x_ix86_isa_flags_explicit
4152 & OPTION_MASK_ISA_AVX512VPOPCNTDQ))
4153 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VPOPCNTDQ;
4154 if (((processor_alias_table[i].flags & PTA_SGX) != 0)
4155 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX))
4156 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX;
4157 if (((processor_alias_table[i].flags & PTA_VAES) != 0)
4158 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_VAES))
4159 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_VAES;
4160 if (((processor_alias_table[i].flags & PTA_RDPID) != 0)
4161 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_RDPID))
4162 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_RDPID;
4163 if (((processor_alias_table[i].flags & PTA_PCONFIG) != 0)
4164 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_PCONFIG))
4165 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PCONFIG;
4166 if (((processor_alias_table[i].flags & PTA_WBNOINVD) != 0)
4167 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_WBNOINVD))
4168 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_WBNOINVD;
4170 if ((processor_alias_table[i].flags
4171 & (PTA_PREFETCH_SSE | PTA_SSE)) != 0)
4172 x86_prefetch_sse = true;
4173 if (((processor_alias_table[i].flags & PTA_MWAITX) != 0)
4174 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MWAITX))
4175 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MWAITX;
4176 if (((processor_alias_table[i].flags & PTA_PKU) != 0)
4177 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
4178 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
4180 /* Don't enable x87 instructions if only
4181 general registers are allowed. */
4182 if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY)
4183 && !(opts_set->x_target_flags & MASK_80387))
4185 if (((processor_alias_table[i].flags & PTA_NO_80387) != 0))
4186 opts->x_target_flags &= ~MASK_80387;
4187 else
4188 opts->x_target_flags |= MASK_80387;
4190 break;
4193 if (TARGET_X32 && (opts->x_ix86_isa_flags2 & OPTION_MASK_ISA_MPX))
4194 error ("Intel MPX does not support x32");
4196 if (TARGET_X32 && (ix86_isa_flags2 & OPTION_MASK_ISA_MPX))
4197 error ("Intel MPX does not support x32");
4199 if (i == pta_size)
4201 error (main_args_p
4202 ? G_("bad value (%qs) for %<-march=%> switch")
4203 : G_("bad value (%qs) for %<target(\"arch=\")%> attribute"),
4204 opts->x_ix86_arch_string);
4206 auto_vec <const char *> candidates;
4207 for (i = 0; i < pta_size; i++)
4208 if (strcmp (processor_alias_table[i].name, "generic")
4209 && strcmp (processor_alias_table[i].name, "intel")
4210 && (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4211 || ((processor_alias_table[i].flags & PTA_64BIT) != 0)))
4212 candidates.safe_push (processor_alias_table[i].name);
4214 #ifdef HAVE_LOCAL_CPU_DETECT
4215 /* Add also "native" as possible value. */
4216 candidates.safe_push ("native");
4217 #endif
4219 char *s;
4220 const char *hint
4221 = candidates_list_and_hint (opts->x_ix86_arch_string, s, candidates);
4222 if (hint)
4223 inform (input_location,
4224 main_args_p
4225 ? G_("valid arguments to %<-march=%> switch are: "
4226 "%s; did you mean %qs?")
4227 : G_("valid arguments to %<target(\"arch=\")%> attribute are: "
4228 "%s; did you mean %qs?"), s, hint);
4229 else
4230 inform (input_location,
4231 main_args_p
4232 ? G_("valid arguments to %<-march=%> switch are: %s")
4233 : G_("valid arguments to %<target(\"arch=\")%> attribute "
4234 "are: %s"), s);
4235 XDELETEVEC (s);
4238 ix86_arch_mask = HOST_WIDE_INT_1U << ix86_arch;
4239 for (i = 0; i < X86_ARCH_LAST; ++i)
4240 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4242 for (i = 0; i < pta_size; i++)
4243 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4245 ix86_schedule = processor_alias_table[i].schedule;
4246 ix86_tune = processor_alias_table[i].processor;
4247 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4249 if (!((processor_alias_table[i].flags & PTA_64BIT) != 0))
4251 if (ix86_tune_defaulted)
4253 opts->x_ix86_tune_string = "x86-64";
4254 for (i = 0; i < pta_size; i++)
4255 if (! strcmp (opts->x_ix86_tune_string,
4256 processor_alias_table[i].name))
4257 break;
4258 ix86_schedule = processor_alias_table[i].schedule;
4259 ix86_tune = processor_alias_table[i].processor;
4261 else
4262 error ("CPU you selected does not support x86-64 "
4263 "instruction set");
4266 /* Intel CPUs have always interpreted SSE prefetch instructions as
4267 NOPs; so, we can enable SSE prefetch instructions even when
4268 -mtune (rather than -march) points us to a processor that has them.
4269 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
4270 higher processors. */
4271 if (TARGET_CMOV
4272 && ((processor_alias_table[i].flags
4273 & (PTA_PREFETCH_SSE | PTA_SSE)) != 0))
4274 x86_prefetch_sse = true;
4275 break;
4278 if (ix86_tune_specified && i == pta_size)
4280 error (main_args_p
4281 ? G_("bad value (%qs) for %<-mtune=%> switch")
4282 : G_("bad value (%qs) for %<target(\"tune=\")%> attribute"),
4283 opts->x_ix86_tune_string);
4285 auto_vec <const char *> candidates;
4286 for (i = 0; i < pta_size; i++)
4287 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4288 || ((processor_alias_table[i].flags & PTA_64BIT) != 0))
4289 candidates.safe_push (processor_alias_table[i].name);
4291 #ifdef HAVE_LOCAL_CPU_DETECT
4292 /* Add also "native" as possible value. */
4293 candidates.safe_push ("native");
4294 #endif
4296 char *s;
4297 const char *hint
4298 = candidates_list_and_hint (opts->x_ix86_tune_string, s, candidates);
4299 if (hint)
4300 inform (input_location,
4301 main_args_p
4302 ? G_("valid arguments to %<-mtune=%> switch are: "
4303 "%s; did you mean %qs?")
4304 : G_("valid arguments to %<target(\"tune=\")%> attribute are: "
4305 "%s; did you mean %qs?"), s, hint);
4306 else
4307 inform (input_location,
4308 main_args_p
4309 ? G_("valid arguments to %<-mtune=%> switch are: %s")
4310 : G_("valid arguments to %<target(\"tune=\")%> attribute "
4311 "are: %s"), s);
4312 XDELETEVEC (s);
4315 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
4317 #ifndef USE_IX86_FRAME_POINTER
4318 #define USE_IX86_FRAME_POINTER 0
4319 #endif
4321 #ifndef USE_X86_64_FRAME_POINTER
4322 #define USE_X86_64_FRAME_POINTER 0
4323 #endif
4325 /* Set the default values for switches whose default depends on TARGET_64BIT
4326 in case they weren't overwritten by command line options. */
4327 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4329 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4330 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
4331 if (opts->x_flag_asynchronous_unwind_tables
4332 && !opts_set->x_flag_unwind_tables
4333 && TARGET_64BIT_MS_ABI)
4334 opts->x_flag_unwind_tables = 1;
4335 if (opts->x_flag_asynchronous_unwind_tables == 2)
4336 opts->x_flag_unwind_tables
4337 = opts->x_flag_asynchronous_unwind_tables = 1;
4338 if (opts->x_flag_pcc_struct_return == 2)
4339 opts->x_flag_pcc_struct_return = 0;
4341 else
4343 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4344 opts->x_flag_omit_frame_pointer
4345 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
4346 if (opts->x_flag_asynchronous_unwind_tables == 2)
4347 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
4348 if (opts->x_flag_pcc_struct_return == 2)
4350 /* Intel MCU psABI specifies that -freg-struct-return should
4351 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
4352 we check -miamcu so that -freg-struct-return is always
4353 turned on if -miamcu is used. */
4354 if (TARGET_IAMCU_P (opts->x_target_flags))
4355 opts->x_flag_pcc_struct_return = 0;
4356 else
4357 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
4361 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4362 /* TODO: ix86_cost should be chosen at instruction or function granuality
4363 so for cold code we use size_cost even in !optimize_size compilation. */
4364 if (opts->x_optimize_size)
4365 ix86_cost = &ix86_size_cost;
4366 else
4367 ix86_cost = ix86_tune_cost;
4369 /* Arrange to set up i386_stack_locals for all functions. */
4370 init_machine_status = ix86_init_machine_status;
4372 /* Validate -mregparm= value. */
4373 if (opts_set->x_ix86_regparm)
4375 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4376 warning (0, "-mregparm is ignored in 64-bit mode");
4377 else if (TARGET_IAMCU_P (opts->x_target_flags))
4378 warning (0, "-mregparm is ignored for Intel MCU psABI");
4379 if (opts->x_ix86_regparm > REGPARM_MAX)
4381 error ("-mregparm=%d is not between 0 and %d",
4382 opts->x_ix86_regparm, REGPARM_MAX);
4383 opts->x_ix86_regparm = 0;
4386 if (TARGET_IAMCU_P (opts->x_target_flags)
4387 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
4388 opts->x_ix86_regparm = REGPARM_MAX;
4390 /* Default align_* from the processor table. */
4391 ix86_default_align (opts);
4393 /* Provide default for -mbranch-cost= value. */
4394 if (!opts_set->x_ix86_branch_cost)
4395 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
4397 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4399 opts->x_target_flags
4400 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
4402 /* Enable by default the SSE and MMX builtins. Do allow the user to
4403 explicitly disable any of these. In particular, disabling SSE and
4404 MMX for kernel code is extremely useful. */
4405 if (!ix86_arch_specified)
4406 opts->x_ix86_isa_flags
4407 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
4408 | TARGET_SUBTARGET64_ISA_DEFAULT)
4409 & ~opts->x_ix86_isa_flags_explicit);
4411 if (TARGET_RTD_P (opts->x_target_flags))
4412 warning (0,
4413 main_args_p
4414 ? G_("%<-mrtd%> is ignored in 64bit mode")
4415 : G_("%<target(\"rtd\")%> is ignored in 64bit mode"));
4417 else
4419 opts->x_target_flags
4420 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
4422 if (!ix86_arch_specified)
4423 opts->x_ix86_isa_flags
4424 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
4426 /* i386 ABI does not specify red zone. It still makes sense to use it
4427 when programmer takes care to stack from being destroyed. */
4428 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
4429 opts->x_target_flags |= MASK_NO_RED_ZONE;
4432 /* Keep nonleaf frame pointers. */
4433 if (opts->x_flag_omit_frame_pointer)
4434 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4435 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
4436 opts->x_flag_omit_frame_pointer = 1;
4438 /* If we're doing fast math, we don't care about comparison order
4439 wrt NaNs. This lets us use a shorter comparison sequence. */
4440 if (opts->x_flag_finite_math_only)
4441 opts->x_target_flags &= ~MASK_IEEE_FP;
4443 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4444 since the insns won't need emulation. */
4445 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
4446 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4448 /* Likewise, if the target doesn't have a 387, or we've specified
4449 software floating point, don't use 387 inline intrinsics. */
4450 if (!TARGET_80387_P (opts->x_target_flags))
4451 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4453 /* Turn on MMX builtins for -msse. */
4454 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4455 opts->x_ix86_isa_flags
4456 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4458 /* Enable SSE prefetch. */
4459 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4460 || (TARGET_PRFCHW_P (opts->x_ix86_isa_flags)
4461 && !TARGET_3DNOW_P (opts->x_ix86_isa_flags))
4462 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4463 x86_prefetch_sse = true;
4465 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4466 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4467 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4468 opts->x_ix86_isa_flags
4469 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4471 /* Enable lzcnt instruction for -mabm. */
4472 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4473 opts->x_ix86_isa_flags
4474 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4476 /* Disable BMI, BMI2 and TBM instructions for -m16. */
4477 if (TARGET_16BIT_P(opts->x_ix86_isa_flags))
4478 opts->x_ix86_isa_flags
4479 &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM)
4480 & ~opts->x_ix86_isa_flags_explicit);
4482 /* Validate -mpreferred-stack-boundary= value or default it to
4483 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4484 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4485 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4487 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags)? 3 : 2;
4488 int max = TARGET_SEH ? 4 : 12;
4490 if (opts->x_ix86_preferred_stack_boundary_arg < min
4491 || opts->x_ix86_preferred_stack_boundary_arg > max)
4493 if (min == max)
4494 error ("-mpreferred-stack-boundary is not supported "
4495 "for this target");
4496 else
4497 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4498 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4500 else
4501 ix86_preferred_stack_boundary
4502 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4505 /* Set the default value for -mstackrealign. */
4506 if (!opts_set->x_ix86_force_align_arg_pointer)
4507 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4509 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4511 /* Validate -mincoming-stack-boundary= value or default it to
4512 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4513 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4514 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4516 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
4518 if (opts->x_ix86_incoming_stack_boundary_arg < min
4519 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4520 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4521 opts->x_ix86_incoming_stack_boundary_arg, min);
4522 else
4524 ix86_user_incoming_stack_boundary
4525 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4526 ix86_incoming_stack_boundary
4527 = ix86_user_incoming_stack_boundary;
4531 #ifndef NO_PROFILE_COUNTERS
4532 if (flag_nop_mcount)
4533 error ("-mnop-mcount is not compatible with this target");
4534 #endif
4535 if (flag_nop_mcount && flag_pic)
4536 error ("-mnop-mcount is not implemented for -fPIC");
4538 /* Accept -msseregparm only if at least SSE support is enabled. */
4539 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4540 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4541 error (main_args_p
4542 ? G_("%<-msseregparm%> used without SSE enabled")
4543 : G_("%<target(\"sseregparm\")%> used without SSE enabled"));
4545 if (opts_set->x_ix86_fpmath)
4547 if (opts->x_ix86_fpmath & FPMATH_SSE)
4549 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4551 if (TARGET_80387_P (opts->x_target_flags))
4553 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4554 opts->x_ix86_fpmath = FPMATH_387;
4557 else if ((opts->x_ix86_fpmath & FPMATH_387)
4558 && !TARGET_80387_P (opts->x_target_flags))
4560 warning (0, "387 instruction set disabled, using SSE arithmetics");
4561 opts->x_ix86_fpmath = FPMATH_SSE;
4565 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4566 fpmath=387. The second is however default at many targets since the
4567 extra 80bit precision of temporaries is considered to be part of ABI.
4568 Overwrite the default at least for -ffast-math.
4569 TODO: -mfpmath=both seems to produce same performing code with bit
4570 smaller binaries. It is however not clear if register allocation is
4571 ready for this setting.
4572 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4573 codegen. We may switch to 387 with -ffast-math for size optimized
4574 functions. */
4575 else if (fast_math_flags_set_p (&global_options)
4576 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4577 opts->x_ix86_fpmath = FPMATH_SSE;
4578 else
4579 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4581 /* Use external vectorized library in vectorizing intrinsics. */
4582 if (opts_set->x_ix86_veclibabi_type)
4583 switch (opts->x_ix86_veclibabi_type)
4585 case ix86_veclibabi_type_svml:
4586 ix86_veclib_handler = ix86_veclibabi_svml;
4587 break;
4589 case ix86_veclibabi_type_acml:
4590 ix86_veclib_handler = ix86_veclibabi_acml;
4591 break;
4593 default:
4594 gcc_unreachable ();
4597 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4598 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4599 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4601 /* If stack probes are required, the space used for large function
4602 arguments on the stack must also be probed, so enable
4603 -maccumulate-outgoing-args so this happens in the prologue. */
4604 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4605 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4607 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4608 warning (0,
4609 main_args_p
4610 ? G_("stack probing requires %<-maccumulate-outgoing-args%> "
4611 "for correctness")
4612 : G_("stack probing requires "
4613 "%<target(\"accumulate-outgoing-args\")%> for "
4614 "correctness"));
4615 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4618 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
4619 so enable -maccumulate-outgoing-args when %ebp is fixed. */
4620 if (fixed_regs[BP_REG]
4621 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4623 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4624 warning (0,
4625 main_args_p
4626 ? G_("fixed ebp register requires "
4627 "%<-maccumulate-outgoing-args%>")
4628 : G_("fixed ebp register requires "
4629 "%<target(\"accumulate-outgoing-args\")%>"));
4630 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4633 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4635 char *p;
4636 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4637 p = strchr (internal_label_prefix, 'X');
4638 internal_label_prefix_len = p - internal_label_prefix;
4639 *p = '\0';
4642 /* When scheduling description is not available, disable scheduler pass
4643 so it won't slow down the compilation and make x87 code slower. */
4644 if (!TARGET_SCHEDULE)
4645 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4647 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4648 ix86_tune_cost->simultaneous_prefetches,
4649 opts->x_param_values,
4650 opts_set->x_param_values);
4651 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4652 ix86_tune_cost->prefetch_block,
4653 opts->x_param_values,
4654 opts_set->x_param_values);
4655 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4656 ix86_tune_cost->l1_cache_size,
4657 opts->x_param_values,
4658 opts_set->x_param_values);
4659 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4660 ix86_tune_cost->l2_cache_size,
4661 opts->x_param_values,
4662 opts_set->x_param_values);
4664 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4665 if (opts->x_flag_prefetch_loop_arrays < 0
4666 && HAVE_prefetch
4667 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4668 && !opts->x_optimize_size
4669 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4670 opts->x_flag_prefetch_loop_arrays = 1;
4672 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4673 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4674 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4675 targetm.expand_builtin_va_start = NULL;
4677 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4679 ix86_gen_leave = gen_leave_rex64;
4680 if (Pmode == DImode)
4682 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4683 ix86_gen_tls_local_dynamic_base_64
4684 = gen_tls_local_dynamic_base_64_di;
4686 else
4688 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4689 ix86_gen_tls_local_dynamic_base_64
4690 = gen_tls_local_dynamic_base_64_si;
4693 else
4694 ix86_gen_leave = gen_leave;
4696 if (Pmode == DImode)
4698 ix86_gen_add3 = gen_adddi3;
4699 ix86_gen_sub3 = gen_subdi3;
4700 ix86_gen_sub3_carry = gen_subdi3_carry;
4701 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4702 ix86_gen_andsp = gen_anddi3;
4703 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4704 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4705 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4706 ix86_gen_monitor = gen_sse3_monitor_di;
4707 ix86_gen_monitorx = gen_monitorx_di;
4708 ix86_gen_clzero = gen_clzero_di;
4710 else
4712 ix86_gen_add3 = gen_addsi3;
4713 ix86_gen_sub3 = gen_subsi3;
4714 ix86_gen_sub3_carry = gen_subsi3_carry;
4715 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4716 ix86_gen_andsp = gen_andsi3;
4717 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4718 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4719 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4720 ix86_gen_monitor = gen_sse3_monitor_si;
4721 ix86_gen_monitorx = gen_monitorx_si;
4722 ix86_gen_clzero = gen_clzero_si;
4725 #ifdef USE_IX86_CLD
4726 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4727 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4728 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4729 #endif
4731 /* Set the default value for -mfentry. */
4732 if (!opts_set->x_flag_fentry)
4733 opts->x_flag_fentry = TARGET_SEH;
4734 else
4736 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic
4737 && opts->x_flag_fentry)
4738 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4739 "with -fpic");
4740 else if (TARGET_SEH && !opts->x_flag_fentry)
4741 sorry ("-mno-fentry isn%'t compatible with SEH");
4744 if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES)
4745 sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
4747 if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
4748 && TARGET_EMIT_VZEROUPPER)
4749 opts->x_target_flags |= MASK_VZEROUPPER;
4750 if (!(opts_set->x_target_flags & MASK_STV))
4751 opts->x_target_flags |= MASK_STV;
4752 /* Disable STV if -mpreferred-stack-boundary={2,3} or
4753 -mincoming-stack-boundary={2,3} or -mstackrealign - the needed
4754 stack realignment will be extra cost the pass doesn't take into
4755 account and the pass can't realign the stack. */
4756 if (ix86_preferred_stack_boundary < 128
4757 || ix86_incoming_stack_boundary < 128
4758 || opts->x_ix86_force_align_arg_pointer)
4759 opts->x_target_flags &= ~MASK_STV;
4760 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4761 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4762 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4763 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4764 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4765 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4767 /* Enable 128-bit AVX instruction generation
4768 for the auto-vectorizer. */
4769 if (TARGET_AVX128_OPTIMAL
4770 && (opts_set->x_prefer_vector_width_type == PVW_NONE))
4771 opts->x_prefer_vector_width_type = PVW_AVX128;
4773 /* Use 256-bit AVX instruction generation
4774 in the auto-vectorizer. */
4775 if (ix86_tune_features[X86_TUNE_AVX256_OPTIMAL]
4776 && (opts_set->x_prefer_vector_width_type == PVW_NONE))
4777 opts->x_prefer_vector_width_type = PVW_AVX256;
4779 if (opts->x_ix86_recip_name)
4781 char *p = ASTRDUP (opts->x_ix86_recip_name);
4782 char *q;
4783 unsigned int mask, i;
4784 bool invert;
4786 while ((q = strtok (p, ",")) != NULL)
4788 p = NULL;
4789 if (*q == '!')
4791 invert = true;
4792 q++;
4794 else
4795 invert = false;
4797 if (!strcmp (q, "default"))
4798 mask = RECIP_MASK_ALL;
4799 else
4801 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4802 if (!strcmp (q, recip_options[i].string))
4804 mask = recip_options[i].mask;
4805 break;
4808 if (i == ARRAY_SIZE (recip_options))
4810 error ("unknown option for -mrecip=%s", q);
4811 invert = false;
4812 mask = RECIP_MASK_NONE;
4816 opts->x_recip_mask_explicit |= mask;
4817 if (invert)
4818 opts->x_recip_mask &= ~mask;
4819 else
4820 opts->x_recip_mask |= mask;
4824 if (TARGET_RECIP_P (opts->x_target_flags))
4825 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4826 else if (opts_set->x_target_flags & MASK_RECIP)
4827 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4829 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4830 for 64-bit Bionic. Also default long double to 64-bit for Intel
4831 MCU psABI. */
4832 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
4833 && !(opts_set->x_target_flags
4834 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4835 opts->x_target_flags |= (TARGET_64BIT
4836 ? MASK_LONG_DOUBLE_128
4837 : MASK_LONG_DOUBLE_64);
4839 /* Only one of them can be active. */
4840 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4841 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4843 /* Handle stack protector */
4844 if (!opts_set->x_ix86_stack_protector_guard)
4845 opts->x_ix86_stack_protector_guard
4846 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4848 #ifdef TARGET_THREAD_SSP_OFFSET
4849 ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4850 #endif
4852 if (global_options_set.x_ix86_stack_protector_guard_offset_str)
4854 char *endp;
4855 const char *str = ix86_stack_protector_guard_offset_str;
4857 errno = 0;
4858 int64_t offset;
4860 #if defined(INT64_T_IS_LONG)
4861 offset = strtol (str, &endp, 0);
4862 #else
4863 offset = strtoll (str, &endp, 0);
4864 #endif
4866 if (!*str || *endp || errno)
4867 error ("%qs is not a valid number "
4868 "in -mstack-protector-guard-offset=", str);
4870 if (!IN_RANGE (offset, HOST_WIDE_INT_C (-0x80000000),
4871 HOST_WIDE_INT_C (0x7fffffff)))
4872 error ("%qs is not a valid offset "
4873 "in -mstack-protector-guard-offset=", str);
4875 ix86_stack_protector_guard_offset = offset;
4878 ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG;
4880 /* The kernel uses a different segment register for performance
4881 reasons; a system call would not have to trash the userspace
4882 segment register, which would be expensive. */
4883 if (ix86_cmodel == CM_KERNEL)
4884 ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS;
4886 if (global_options_set.x_ix86_stack_protector_guard_reg_str)
4888 const char *str = ix86_stack_protector_guard_reg_str;
4889 addr_space_t seg = ADDR_SPACE_GENERIC;
4891 /* Discard optional register prefix. */
4892 if (str[0] == '%')
4893 str++;
4895 if (strlen (str) == 2 && str[1] == 's')
4897 if (str[0] == 'f')
4898 seg = ADDR_SPACE_SEG_FS;
4899 else if (str[0] == 'g')
4900 seg = ADDR_SPACE_SEG_GS;
4903 if (seg == ADDR_SPACE_GENERIC)
4904 error ("%qs is not a valid base register "
4905 "in -mstack-protector-guard-reg=",
4906 ix86_stack_protector_guard_reg_str);
4908 ix86_stack_protector_guard_reg = seg;
4911 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4912 if (opts->x_ix86_tune_memcpy_strategy)
4914 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4915 ix86_parse_stringop_strategy_string (str, false);
4916 free (str);
4919 if (opts->x_ix86_tune_memset_strategy)
4921 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4922 ix86_parse_stringop_strategy_string (str, true);
4923 free (str);
4926 /* Save the initial options in case the user does function specific
4927 options. */
4928 if (main_args_p)
4929 target_option_default_node = target_option_current_node
4930 = build_target_option_node (opts);
4932 /* Do not support control flow instrumentation if CET is not enabled. */
4933 cf_protection_level cf_protection
4934 = (cf_protection_level) (opts->x_flag_cf_protection & ~CF_SET);
4935 if (cf_protection != CF_NONE)
4937 switch (cf_protection)
4939 case CF_BRANCH:
4940 if (! TARGET_IBT_P (opts->x_ix86_isa_flags2))
4942 error ("%<-fcf-protection=branch%> requires Intel CET "
4943 "support. Use -mcet or -mibt option to enable CET");
4944 flag_cf_protection = CF_NONE;
4945 return false;
4947 break;
4948 case CF_RETURN:
4949 if (! TARGET_SHSTK_P (opts->x_ix86_isa_flags))
4951 error ("%<-fcf-protection=return%> requires Intel CET "
4952 "support. Use -mcet or -mshstk option to enable CET");
4953 flag_cf_protection = CF_NONE;
4954 return false;
4956 break;
4957 case CF_FULL:
4958 if ( ! TARGET_IBT_P (opts->x_ix86_isa_flags2)
4959 || ! TARGET_SHSTK_P (opts->x_ix86_isa_flags))
4961 error ("%<-fcf-protection=full%> requires Intel CET "
4962 "support. Use -mcet or both of -mibt and "
4963 "-mshstk options to enable CET");
4964 flag_cf_protection = CF_NONE;
4965 return false;
4967 break;
4968 default:
4969 gcc_unreachable ();
4972 opts->x_flag_cf_protection =
4973 (cf_protection_level) (cf_protection | CF_SET);
4976 if (ix86_tune_features [X86_TUNE_AVOID_128FMA_CHAINS])
4977 maybe_set_param_value (PARAM_AVOID_FMA_MAX_BITS, 128,
4978 opts->x_param_values,
4979 opts_set->x_param_values);
4981 return true;
4984 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4986 static void
4987 ix86_option_override (void)
4989 ix86_option_override_internal (true, &global_options, &global_options_set);
4992 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4993 static char *
4994 ix86_offload_options (void)
4996 if (TARGET_LP64)
4997 return xstrdup ("-foffload-abi=lp64");
4998 return xstrdup ("-foffload-abi=ilp32");
5001 /* Update register usage after having seen the compiler flags. */
5003 static void
5004 ix86_conditional_register_usage (void)
5006 int i, c_mask;
5008 /* If there are no caller-saved registers, preserve all registers.
5009 except fixed_regs and registers used for function return value
5010 since aggregate_value_p checks call_used_regs[regno] on return
5011 value. */
5012 if (cfun && cfun->machine->no_caller_saved_registers)
5013 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5014 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
5015 call_used_regs[i] = 0;
5017 /* For 32-bit targets, squash the REX registers. */
5018 if (! TARGET_64BIT)
5020 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
5021 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5022 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
5023 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5024 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5025 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5028 /* See the definition of CALL_USED_REGISTERS in i386.h. */
5029 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
5031 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
5033 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5035 /* Set/reset conditionally defined registers from
5036 CALL_USED_REGISTERS initializer. */
5037 if (call_used_regs[i] > 1)
5038 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
5040 /* Calculate registers of CLOBBERED_REGS register set
5041 as call used registers from GENERAL_REGS register set. */
5042 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
5043 && call_used_regs[i])
5044 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
5047 /* If MMX is disabled, squash the registers. */
5048 if (! TARGET_MMX)
5049 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5050 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
5051 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5053 /* If SSE is disabled, squash the registers. */
5054 if (! TARGET_SSE)
5055 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5056 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
5057 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5059 /* If the FPU is disabled, squash the registers. */
5060 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
5061 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5062 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
5063 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5065 /* If AVX512F is disabled, squash the registers. */
5066 if (! TARGET_AVX512F)
5068 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5069 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5071 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
5072 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5075 /* If MPX is disabled, squash the registers. */
5076 if (! TARGET_MPX)
5077 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
5078 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5081 /* Canonicalize a comparison from one we don't have to one we do have. */
5083 static void
5084 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5085 bool op0_preserve_value)
5087 /* The order of operands in x87 ficom compare is forced by combine in
5088 simplify_comparison () function. Float operator is treated as RTX_OBJ
5089 with a precedence over other operators and is always put in the first
5090 place. Swap condition and operands to match ficom instruction. */
5091 if (!op0_preserve_value
5092 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
5094 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
5096 /* We are called only for compares that are split to SAHF instruction.
5097 Ensure that we have setcc/jcc insn for the swapped condition. */
5098 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
5100 std::swap (*op0, *op1);
5101 *code = (int) scode;
5106 /* Save the current options */
5108 static void
5109 ix86_function_specific_save (struct cl_target_option *ptr,
5110 struct gcc_options *opts)
5112 ptr->arch = ix86_arch;
5113 ptr->schedule = ix86_schedule;
5114 ptr->prefetch_sse = x86_prefetch_sse;
5115 ptr->tune = ix86_tune;
5116 ptr->branch_cost = ix86_branch_cost;
5117 ptr->tune_defaulted = ix86_tune_defaulted;
5118 ptr->arch_specified = ix86_arch_specified;
5119 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
5120 ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit;
5121 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
5122 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
5123 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
5124 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
5125 ptr->x_ix86_abi = opts->x_ix86_abi;
5126 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
5127 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
5128 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
5129 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
5130 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
5131 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
5132 ptr->x_ix86_pmode = opts->x_ix86_pmode;
5133 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
5134 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
5135 ptr->x_ix86_regparm = opts->x_ix86_regparm;
5136 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
5137 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
5138 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
5139 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
5140 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
5141 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
5142 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
5143 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
5144 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
5145 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
5147 /* The fields are char but the variables are not; make sure the
5148 values fit in the fields. */
5149 gcc_assert (ptr->arch == ix86_arch);
5150 gcc_assert (ptr->schedule == ix86_schedule);
5151 gcc_assert (ptr->tune == ix86_tune);
5152 gcc_assert (ptr->branch_cost == ix86_branch_cost);
5155 /* Restore the current options */
5157 static void
5158 ix86_function_specific_restore (struct gcc_options *opts,
5159 struct cl_target_option *ptr)
5161 enum processor_type old_tune = ix86_tune;
5162 enum processor_type old_arch = ix86_arch;
5163 unsigned HOST_WIDE_INT ix86_arch_mask;
5164 int i;
5166 /* We don't change -fPIC. */
5167 opts->x_flag_pic = flag_pic;
5169 ix86_arch = (enum processor_type) ptr->arch;
5170 ix86_schedule = (enum attr_cpu) ptr->schedule;
5171 ix86_tune = (enum processor_type) ptr->tune;
5172 x86_prefetch_sse = ptr->prefetch_sse;
5173 opts->x_ix86_branch_cost = ptr->branch_cost;
5174 ix86_tune_defaulted = ptr->tune_defaulted;
5175 ix86_arch_specified = ptr->arch_specified;
5176 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
5177 opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit;
5178 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
5179 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
5180 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
5181 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
5182 opts->x_ix86_abi = ptr->x_ix86_abi;
5183 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
5184 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
5185 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
5186 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
5187 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
5188 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
5189 opts->x_ix86_pmode = ptr->x_ix86_pmode;
5190 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
5191 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
5192 opts->x_ix86_regparm = ptr->x_ix86_regparm;
5193 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
5194 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
5195 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
5196 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
5197 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
5198 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
5199 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
5200 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
5201 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
5202 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
5203 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5204 /* TODO: ix86_cost should be chosen at instruction or function granuality
5205 so for cold code we use size_cost even in !optimize_size compilation. */
5206 if (opts->x_optimize_size)
5207 ix86_cost = &ix86_size_cost;
5208 else
5209 ix86_cost = ix86_tune_cost;
5211 /* Recreate the arch feature tests if the arch changed */
5212 if (old_arch != ix86_arch)
5214 ix86_arch_mask = HOST_WIDE_INT_1U << ix86_arch;
5215 for (i = 0; i < X86_ARCH_LAST; ++i)
5216 ix86_arch_features[i]
5217 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5220 /* Recreate the tune optimization tests */
5221 if (old_tune != ix86_tune)
5222 set_ix86_tune_features (ix86_tune, false);
5225 /* Adjust target options after streaming them in. This is mainly about
5226 reconciling them with global options. */
5228 static void
5229 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
5231 /* flag_pic is a global option, but ix86_cmodel is target saved option
5232 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
5233 for PIC, or error out. */
5234 if (flag_pic)
5235 switch (ptr->x_ix86_cmodel)
5237 case CM_SMALL:
5238 ptr->x_ix86_cmodel = CM_SMALL_PIC;
5239 break;
5241 case CM_MEDIUM:
5242 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
5243 break;
5245 case CM_LARGE:
5246 ptr->x_ix86_cmodel = CM_LARGE_PIC;
5247 break;
5249 case CM_KERNEL:
5250 error ("code model %s does not support PIC mode", "kernel");
5251 break;
5253 default:
5254 break;
5256 else
5257 switch (ptr->x_ix86_cmodel)
5259 case CM_SMALL_PIC:
5260 ptr->x_ix86_cmodel = CM_SMALL;
5261 break;
5263 case CM_MEDIUM_PIC:
5264 ptr->x_ix86_cmodel = CM_MEDIUM;
5265 break;
5267 case CM_LARGE_PIC:
5268 ptr->x_ix86_cmodel = CM_LARGE;
5269 break;
5271 default:
5272 break;
5276 /* Print the current options */
5278 static void
5279 ix86_function_specific_print (FILE *file, int indent,
5280 struct cl_target_option *ptr)
5282 char *target_string
5283 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2,
5284 ptr->x_target_flags, ptr->x_ix86_target_flags,
5285 NULL, NULL, ptr->x_ix86_fpmath, false);
5287 gcc_assert (ptr->arch < PROCESSOR_max);
5288 fprintf (file, "%*sarch = %d (%s)\n",
5289 indent, "",
5290 ptr->arch, processor_target_table[ptr->arch].name);
5292 gcc_assert (ptr->tune < PROCESSOR_max);
5293 fprintf (file, "%*stune = %d (%s)\n",
5294 indent, "",
5295 ptr->tune, processor_target_table[ptr->tune].name);
5297 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5299 if (target_string)
5301 fprintf (file, "%*s%s\n", indent, "", target_string);
5302 free (target_string);
5307 /* Inner function to process the attribute((target(...))), take an argument and
5308 set the current options from the argument. If we have a list, recursively go
5309 over the list. */
5311 static bool
5312 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5313 struct gcc_options *opts,
5314 struct gcc_options *opts_set,
5315 struct gcc_options *enum_opts_set)
5317 char *next_optstr;
5318 bool ret = true;
5320 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5321 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5322 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5323 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5324 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5326 enum ix86_opt_type
5328 ix86_opt_unknown,
5329 ix86_opt_yes,
5330 ix86_opt_no,
5331 ix86_opt_str,
5332 ix86_opt_enum,
5333 ix86_opt_isa
5336 static const struct
5338 const char *string;
5339 size_t len;
5340 enum ix86_opt_type type;
5341 int opt;
5342 int mask;
5343 } attrs[] = {
5344 /* isa options */
5345 IX86_ATTR_ISA ("pconfig", OPT_mpconfig),
5346 IX86_ATTR_ISA ("wbnoinvd", OPT_mwbnoinvd),
5347 IX86_ATTR_ISA ("sgx", OPT_msgx),
5348 IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps),
5349 IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw),
5350 IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq),
5351 IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2),
5352 IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni),
5353 IX86_ATTR_ISA ("avx512bitalg", OPT_mavx512bitalg),
5355 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5356 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5357 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
5358 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
5359 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
5360 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
5361 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
5362 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
5363 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
5364 IX86_ATTR_ISA ("avx2", OPT_mavx2),
5365 IX86_ATTR_ISA ("fma", OPT_mfma),
5366 IX86_ATTR_ISA ("xop", OPT_mxop),
5367 IX86_ATTR_ISA ("fma4", OPT_mfma4),
5368 IX86_ATTR_ISA ("f16c", OPT_mf16c),
5369 IX86_ATTR_ISA ("avx", OPT_mavx),
5370 IX86_ATTR_ISA ("sse4", OPT_msse4),
5371 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
5372 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
5373 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
5374 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
5375 IX86_ATTR_ISA ("sse3", OPT_msse3),
5376 IX86_ATTR_ISA ("aes", OPT_maes),
5377 IX86_ATTR_ISA ("sha", OPT_msha),
5378 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
5379 IX86_ATTR_ISA ("sse2", OPT_msse2),
5380 IX86_ATTR_ISA ("sse", OPT_msse),
5381 IX86_ATTR_ISA ("3dnowa", OPT_m3dnowa),
5382 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
5383 IX86_ATTR_ISA ("mmx", OPT_mmmx),
5384 IX86_ATTR_ISA ("rtm", OPT_mrtm),
5385 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
5386 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
5387 IX86_ATTR_ISA ("adx", OPT_madx),
5388 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5389 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5390 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
5391 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
5392 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
5393 IX86_ATTR_ISA ("xsave", OPT_mxsave),
5394 IX86_ATTR_ISA ("abm", OPT_mabm),
5395 IX86_ATTR_ISA ("bmi", OPT_mbmi),
5396 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
5397 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
5398 IX86_ATTR_ISA ("tbm", OPT_mtbm),
5399 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
5400 IX86_ATTR_ISA ("cx16", OPT_mcx16),
5401 IX86_ATTR_ISA ("sahf", OPT_msahf),
5402 IX86_ATTR_ISA ("movbe", OPT_mmovbe),
5403 IX86_ATTR_ISA ("crc32", OPT_mcrc32),
5404 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
5405 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
5406 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
5407 IX86_ATTR_ISA ("clzero", OPT_mclzero),
5408 IX86_ATTR_ISA ("pku", OPT_mpku),
5409 IX86_ATTR_ISA ("lwp", OPT_mlwp),
5410 IX86_ATTR_ISA ("hle", OPT_mhle),
5411 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
5412 IX86_ATTR_ISA ("mpx", OPT_mmpx),
5413 IX86_ATTR_ISA ("clwb", OPT_mclwb),
5414 IX86_ATTR_ISA ("rdpid", OPT_mrdpid),
5415 IX86_ATTR_ISA ("gfni", OPT_mgfni),
5416 IX86_ATTR_ISA ("ibt", OPT_mibt),
5417 IX86_ATTR_ISA ("shstk", OPT_mshstk),
5418 IX86_ATTR_ISA ("vaes", OPT_mvaes),
5419 IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq),
5421 /* enum options */
5422 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
5424 /* string options */
5425 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
5426 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
5428 /* flag options */
5429 IX86_ATTR_YES ("cld",
5430 OPT_mcld,
5431 MASK_CLD),
5433 IX86_ATTR_NO ("fancy-math-387",
5434 OPT_mfancy_math_387,
5435 MASK_NO_FANCY_MATH_387),
5437 IX86_ATTR_YES ("ieee-fp",
5438 OPT_mieee_fp,
5439 MASK_IEEE_FP),
5441 IX86_ATTR_YES ("inline-all-stringops",
5442 OPT_minline_all_stringops,
5443 MASK_INLINE_ALL_STRINGOPS),
5445 IX86_ATTR_YES ("inline-stringops-dynamically",
5446 OPT_minline_stringops_dynamically,
5447 MASK_INLINE_STRINGOPS_DYNAMICALLY),
5449 IX86_ATTR_NO ("align-stringops",
5450 OPT_mno_align_stringops,
5451 MASK_NO_ALIGN_STRINGOPS),
5453 IX86_ATTR_YES ("recip",
5454 OPT_mrecip,
5455 MASK_RECIP),
5459 /* If this is a list, recurse to get the options. */
5460 if (TREE_CODE (args) == TREE_LIST)
5462 bool ret = true;
5464 for (; args; args = TREE_CHAIN (args))
5465 if (TREE_VALUE (args)
5466 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
5467 p_strings, opts, opts_set,
5468 enum_opts_set))
5469 ret = false;
5471 return ret;
5474 else if (TREE_CODE (args) != STRING_CST)
5476 error ("attribute %<target%> argument not a string");
5477 return false;
5480 /* Handle multiple arguments separated by commas. */
5481 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
5483 while (next_optstr && *next_optstr != '\0')
5485 char *p = next_optstr;
5486 char *orig_p = p;
5487 char *comma = strchr (next_optstr, ',');
5488 const char *opt_string;
5489 size_t len, opt_len;
5490 int opt;
5491 bool opt_set_p;
5492 char ch;
5493 unsigned i;
5494 enum ix86_opt_type type = ix86_opt_unknown;
5495 int mask = 0;
5497 if (comma)
5499 *comma = '\0';
5500 len = comma - next_optstr;
5501 next_optstr = comma + 1;
5503 else
5505 len = strlen (p);
5506 next_optstr = NULL;
5509 /* Recognize no-xxx. */
5510 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
5512 opt_set_p = false;
5513 p += 3;
5514 len -= 3;
5516 else
5517 opt_set_p = true;
5519 /* Find the option. */
5520 ch = *p;
5521 opt = N_OPTS;
5522 for (i = 0; i < ARRAY_SIZE (attrs); i++)
5524 type = attrs[i].type;
5525 opt_len = attrs[i].len;
5526 if (ch == attrs[i].string[0]
5527 && ((type != ix86_opt_str && type != ix86_opt_enum)
5528 ? len == opt_len
5529 : len > opt_len)
5530 && memcmp (p, attrs[i].string, opt_len) == 0)
5532 opt = attrs[i].opt;
5533 mask = attrs[i].mask;
5534 opt_string = attrs[i].string;
5535 break;
5539 /* Process the option. */
5540 if (opt == N_OPTS)
5542 error ("attribute(target(\"%s\")) is unknown", orig_p);
5543 ret = false;
5546 else if (type == ix86_opt_isa)
5548 struct cl_decoded_option decoded;
5550 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
5551 ix86_handle_option (opts, opts_set,
5552 &decoded, input_location);
5555 else if (type == ix86_opt_yes || type == ix86_opt_no)
5557 if (type == ix86_opt_no)
5558 opt_set_p = !opt_set_p;
5560 if (opt_set_p)
5561 opts->x_target_flags |= mask;
5562 else
5563 opts->x_target_flags &= ~mask;
5566 else if (type == ix86_opt_str)
5568 if (p_strings[opt])
5570 error ("option(\"%s\") was already specified", opt_string);
5571 ret = false;
5573 else
5574 p_strings[opt] = xstrdup (p + opt_len);
5577 else if (type == ix86_opt_enum)
5579 bool arg_ok;
5580 int value;
5582 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
5583 if (arg_ok)
5584 set_option (opts, enum_opts_set, opt, value,
5585 p + opt_len, DK_UNSPECIFIED, input_location,
5586 global_dc);
5587 else
5589 error ("attribute(target(\"%s\")) is unknown", orig_p);
5590 ret = false;
5594 else
5595 gcc_unreachable ();
5598 return ret;
5601 /* Release allocated strings. */
5602 static void
5603 release_options_strings (char **option_strings)
5605 /* Free up memory allocated to hold the strings */
5606 for (unsigned i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5607 free (option_strings[i]);
5610 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
5612 tree
5613 ix86_valid_target_attribute_tree (tree args,
5614 struct gcc_options *opts,
5615 struct gcc_options *opts_set)
5617 const char *orig_arch_string = opts->x_ix86_arch_string;
5618 const char *orig_tune_string = opts->x_ix86_tune_string;
5619 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
5620 int orig_tune_defaulted = ix86_tune_defaulted;
5621 int orig_arch_specified = ix86_arch_specified;
5622 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
5623 tree t = NULL_TREE;
5624 struct cl_target_option *def
5625 = TREE_TARGET_OPTION (target_option_default_node);
5626 struct gcc_options enum_opts_set;
5628 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
5630 /* Process each of the options on the chain. */
5631 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
5632 opts_set, &enum_opts_set))
5633 return error_mark_node;
5635 /* If the changed options are different from the default, rerun
5636 ix86_option_override_internal, and then save the options away.
5637 The string options are attribute options, and will be undone
5638 when we copy the save structure. */
5639 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
5640 || opts->x_ix86_isa_flags2 != def->x_ix86_isa_flags2
5641 || opts->x_target_flags != def->x_target_flags
5642 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
5643 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
5644 || enum_opts_set.x_ix86_fpmath)
5646 /* If we are using the default tune= or arch=, undo the string assigned,
5647 and use the default. */
5648 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
5650 opts->x_ix86_arch_string
5651 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
5653 /* If arch= is set, clear all bits in x_ix86_isa_flags,
5654 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
5655 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
5656 | OPTION_MASK_ABI_64
5657 | OPTION_MASK_ABI_X32
5658 | OPTION_MASK_CODE16);
5659 opts->x_ix86_isa_flags2 = 0;
5661 else if (!orig_arch_specified)
5662 opts->x_ix86_arch_string = NULL;
5664 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5665 opts->x_ix86_tune_string
5666 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
5667 else if (orig_tune_defaulted)
5668 opts->x_ix86_tune_string = NULL;
5670 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5671 if (enum_opts_set.x_ix86_fpmath)
5672 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5674 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5675 bool r = ix86_option_override_internal (false, opts, opts_set);
5676 if (!r)
5678 release_options_strings (option_strings);
5679 return error_mark_node;
5682 /* Add any builtin functions with the new isa if any. */
5683 ix86_add_new_builtins (opts->x_ix86_isa_flags, opts->x_ix86_isa_flags2);
5685 /* Save the current options unless we are validating options for
5686 #pragma. */
5687 t = build_target_option_node (opts);
5689 opts->x_ix86_arch_string = orig_arch_string;
5690 opts->x_ix86_tune_string = orig_tune_string;
5691 opts_set->x_ix86_fpmath = orig_fpmath_set;
5693 release_options_strings (option_strings);
5696 return t;
5699 /* Hook to validate attribute((target("string"))). */
5701 static bool
5702 ix86_valid_target_attribute_p (tree fndecl,
5703 tree ARG_UNUSED (name),
5704 tree args,
5705 int ARG_UNUSED (flags))
5707 struct gcc_options func_options;
5708 tree new_target, new_optimize;
5709 bool ret = true;
5711 /* attribute((target("default"))) does nothing, beyond
5712 affecting multi-versioning. */
5713 if (TREE_VALUE (args)
5714 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5715 && TREE_CHAIN (args) == NULL_TREE
5716 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5717 return true;
5719 tree old_optimize = build_optimization_node (&global_options);
5721 /* Get the optimization options of the current function. */
5722 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5724 if (!func_optimize)
5725 func_optimize = old_optimize;
5727 /* Init func_options. */
5728 memset (&func_options, 0, sizeof (func_options));
5729 init_options_struct (&func_options, NULL);
5730 lang_hooks.init_options_struct (&func_options);
5732 cl_optimization_restore (&func_options,
5733 TREE_OPTIMIZATION (func_optimize));
5735 /* Initialize func_options to the default before its target options can
5736 be set. */
5737 cl_target_option_restore (&func_options,
5738 TREE_TARGET_OPTION (target_option_default_node));
5740 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5741 &global_options_set);
5743 new_optimize = build_optimization_node (&func_options);
5745 if (new_target == error_mark_node)
5746 ret = false;
5748 else if (fndecl && new_target)
5750 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5752 if (old_optimize != new_optimize)
5753 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5756 finalize_options_struct (&func_options);
5758 return ret;
5762 /* Hook to determine if one function can safely inline another. */
5764 static bool
5765 ix86_can_inline_p (tree caller, tree callee)
5767 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5768 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5769 if (!callee_tree)
5770 callee_tree = target_option_default_node;
5771 if (!caller_tree)
5772 caller_tree = target_option_default_node;
5773 if (callee_tree == caller_tree)
5774 return true;
5776 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5777 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5778 bool ret = false;
5780 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
5781 function can inline a SSE2 function but a SSE2 function can't inline
5782 a SSE4 function. */
5783 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5784 != callee_opts->x_ix86_isa_flags)
5785 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
5786 != callee_opts->x_ix86_isa_flags2))
5787 ret = false;
5789 /* See if we have the same non-isa options. */
5790 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5791 ret = false;
5793 /* See if arch, tune, etc. are the same. */
5794 else if (caller_opts->arch != callee_opts->arch)
5795 ret = false;
5797 else if (caller_opts->tune != callee_opts->tune)
5798 ret = false;
5800 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
5801 /* If the calle doesn't use FP expressions differences in
5802 ix86_fpmath can be ignored. We are called from FEs
5803 for multi-versioning call optimization, so beware of
5804 ipa_fn_summaries not available. */
5805 && (! ipa_fn_summaries
5806 || ipa_fn_summaries->get
5807 (cgraph_node::get (callee))->fp_expressions))
5808 ret = false;
5810 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5811 ret = false;
5813 else
5814 ret = true;
5816 return ret;
5820 /* Remember the last target of ix86_set_current_function. */
5821 static GTY(()) tree ix86_previous_fndecl;
5823 /* Set targets globals to the default (or current #pragma GCC target
5824 if active). Invalidate ix86_previous_fndecl cache. */
5826 void
5827 ix86_reset_previous_fndecl (void)
5829 tree new_tree = target_option_current_node;
5830 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5831 if (TREE_TARGET_GLOBALS (new_tree))
5832 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5833 else if (new_tree == target_option_default_node)
5834 restore_target_globals (&default_target_globals);
5835 else
5836 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5837 ix86_previous_fndecl = NULL_TREE;
5840 /* Set the func_type field from the function FNDECL. */
5842 static void
5843 ix86_set_func_type (tree fndecl)
5845 if (cfun->machine->func_type == TYPE_UNKNOWN)
5847 if (lookup_attribute ("interrupt",
5848 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5850 if (ix86_function_naked (fndecl))
5851 error_at (DECL_SOURCE_LOCATION (fndecl),
5852 "interrupt and naked attributes are not compatible");
5854 int nargs = 0;
5855 for (tree arg = DECL_ARGUMENTS (fndecl);
5856 arg;
5857 arg = TREE_CHAIN (arg))
5858 nargs++;
5859 cfun->machine->no_caller_saved_registers = true;
5860 cfun->machine->func_type
5861 = nargs == 2 ? TYPE_EXCEPTION : TYPE_INTERRUPT;
5863 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
5865 /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument. */
5866 if (write_symbols != NO_DEBUG && write_symbols != DWARF2_DEBUG)
5867 sorry ("Only DWARF debug format is supported for interrupt "
5868 "service routine.");
5870 else
5872 cfun->machine->func_type = TYPE_NORMAL;
5873 if (lookup_attribute ("no_caller_saved_registers",
5874 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5875 cfun->machine->no_caller_saved_registers = true;
5880 /* Set the indirect_branch_type field from the function FNDECL. */
5882 static void
5883 ix86_set_indirect_branch_type (tree fndecl)
5885 if (cfun->machine->indirect_branch_type == indirect_branch_unset)
5887 tree attr = lookup_attribute ("indirect_branch",
5888 DECL_ATTRIBUTES (fndecl));
5889 if (attr != NULL)
5891 tree args = TREE_VALUE (attr);
5892 if (args == NULL)
5893 gcc_unreachable ();
5894 tree cst = TREE_VALUE (args);
5895 if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0)
5896 cfun->machine->indirect_branch_type = indirect_branch_keep;
5897 else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0)
5898 cfun->machine->indirect_branch_type = indirect_branch_thunk;
5899 else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0)
5900 cfun->machine->indirect_branch_type = indirect_branch_thunk_inline;
5901 else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0)
5902 cfun->machine->indirect_branch_type = indirect_branch_thunk_extern;
5903 else
5904 gcc_unreachable ();
5906 else
5907 cfun->machine->indirect_branch_type = ix86_indirect_branch;
5909 /* -mcmodel=large is not compatible with -mindirect-branch=thunk
5910 nor -mindirect-branch=thunk-extern. */
5911 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
5912 && ((cfun->machine->indirect_branch_type
5913 == indirect_branch_thunk_extern)
5914 || (cfun->machine->indirect_branch_type
5915 == indirect_branch_thunk)))
5916 error ("%<-mindirect-branch=%s%> and %<-mcmodel=large%> are not "
5917 "compatible",
5918 ((cfun->machine->indirect_branch_type
5919 == indirect_branch_thunk_extern)
5920 ? "thunk-extern" : "thunk"));
5922 /* -mindirect-branch=thunk-extern, -fcf-protection=branch and
5923 -fcheck-pointer-bounds are not compatible. */
5924 if ((cfun->machine->indirect_branch_type
5925 == indirect_branch_thunk_extern)
5926 && flag_check_pointer_bounds
5927 && (flag_cf_protection & CF_BRANCH) != 0)
5928 error ("%<-mindirect-branch=thunk-extern%>, "
5929 "%<-fcf-protection=branch%> and "
5930 "%<-fcheck-pointer-bounds%> are not compatible");
5933 if (cfun->machine->function_return_type == indirect_branch_unset)
5935 tree attr = lookup_attribute ("function_return",
5936 DECL_ATTRIBUTES (fndecl));
5937 if (attr != NULL)
5939 tree args = TREE_VALUE (attr);
5940 if (args == NULL)
5941 gcc_unreachable ();
5942 tree cst = TREE_VALUE (args);
5943 if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0)
5944 cfun->machine->function_return_type = indirect_branch_keep;
5945 else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0)
5946 cfun->machine->function_return_type = indirect_branch_thunk;
5947 else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0)
5948 cfun->machine->function_return_type = indirect_branch_thunk_inline;
5949 else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0)
5950 cfun->machine->function_return_type = indirect_branch_thunk_extern;
5951 else
5952 gcc_unreachable ();
5954 else
5955 cfun->machine->function_return_type = ix86_function_return;
5957 /* -mcmodel=large is not compatible with -mfunction-return=thunk
5958 nor -mfunction-return=thunk-extern. */
5959 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
5960 && ((cfun->machine->function_return_type
5961 == indirect_branch_thunk_extern)
5962 || (cfun->machine->function_return_type
5963 == indirect_branch_thunk)))
5964 error ("%<-mfunction-return=%s%> and %<-mcmodel=large%> are not "
5965 "compatible",
5966 ((cfun->machine->function_return_type
5967 == indirect_branch_thunk_extern)
5968 ? "thunk-extern" : "thunk"));
5972 /* Establish appropriate back-end context for processing the function
5973 FNDECL. The argument might be NULL to indicate processing at top
5974 level, outside of any function scope. */
5975 static void
5976 ix86_set_current_function (tree fndecl)
5978 /* Only change the context if the function changes. This hook is called
5979 several times in the course of compiling a function, and we don't want to
5980 slow things down too much or call target_reinit when it isn't safe. */
5981 if (fndecl == ix86_previous_fndecl)
5983 /* There may be 2 function bodies for the same function FNDECL,
5984 one is extern inline and one isn't. Call ix86_set_func_type
5985 to set the func_type field. */
5986 if (fndecl != NULL_TREE)
5988 ix86_set_func_type (fndecl);
5989 ix86_set_indirect_branch_type (fndecl);
5991 return;
5994 tree old_tree;
5995 if (ix86_previous_fndecl == NULL_TREE)
5996 old_tree = target_option_current_node;
5997 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5998 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5999 else
6000 old_tree = target_option_default_node;
6002 if (fndecl == NULL_TREE)
6004 if (old_tree != target_option_current_node)
6005 ix86_reset_previous_fndecl ();
6006 return;
6009 ix86_set_func_type (fndecl);
6010 ix86_set_indirect_branch_type (fndecl);
6012 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
6013 if (new_tree == NULL_TREE)
6014 new_tree = target_option_default_node;
6016 if (old_tree != new_tree)
6018 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6019 if (TREE_TARGET_GLOBALS (new_tree))
6020 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6021 else if (new_tree == target_option_default_node)
6022 restore_target_globals (&default_target_globals);
6023 else
6024 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6026 ix86_previous_fndecl = fndecl;
6028 static bool prev_no_caller_saved_registers;
6030 /* 64-bit MS and SYSV ABI have different set of call used registers.
6031 Avoid expensive re-initialization of init_regs each time we switch
6032 function context. */
6033 if (TARGET_64BIT
6034 && (call_used_regs[SI_REG]
6035 == (cfun->machine->call_abi == MS_ABI)))
6036 reinit_regs ();
6037 /* Need to re-initialize init_regs if caller-saved registers are
6038 changed. */
6039 else if (prev_no_caller_saved_registers
6040 != cfun->machine->no_caller_saved_registers)
6041 reinit_regs ();
6043 if (cfun->machine->func_type != TYPE_NORMAL
6044 || cfun->machine->no_caller_saved_registers)
6046 /* Don't allow MPX, SSE, MMX nor x87 instructions since they
6047 may change processor state. */
6048 const char *isa;
6049 if (TARGET_MPX)
6050 isa = "MPX";
6051 else if (TARGET_SSE)
6052 isa = "SSE";
6053 else if (TARGET_MMX)
6054 isa = "MMX/3Dnow";
6055 else if (TARGET_80387)
6056 isa = "80387";
6057 else
6058 isa = NULL;
6059 if (isa != NULL)
6061 if (cfun->machine->func_type != TYPE_NORMAL)
6062 sorry ("%s instructions aren't allowed in %s service routine",
6063 isa, (cfun->machine->func_type == TYPE_EXCEPTION
6064 ? "exception" : "interrupt"));
6065 else
6066 sorry ("%s instructions aren't allowed in function with "
6067 "no_caller_saved_registers attribute", isa);
6068 /* Don't issue the same error twice. */
6069 cfun->machine->func_type = TYPE_NORMAL;
6070 cfun->machine->no_caller_saved_registers = false;
6074 prev_no_caller_saved_registers
6075 = cfun->machine->no_caller_saved_registers;
6079 /* Return true if this goes in large data/bss. */
6081 static bool
6082 ix86_in_large_data_p (tree exp)
6084 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
6085 return false;
6087 if (exp == NULL_TREE)
6088 return false;
6090 /* Functions are never large data. */
6091 if (TREE_CODE (exp) == FUNCTION_DECL)
6092 return false;
6094 /* Automatic variables are never large data. */
6095 if (VAR_P (exp) && !is_global_var (exp))
6096 return false;
6098 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
6100 const char *section = DECL_SECTION_NAME (exp);
6101 if (strcmp (section, ".ldata") == 0
6102 || strcmp (section, ".lbss") == 0)
6103 return true;
6104 return false;
6106 else
6108 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
6110 /* If this is an incomplete type with size 0, then we can't put it
6111 in data because it might be too big when completed. Also,
6112 int_size_in_bytes returns -1 if size can vary or is larger than
6113 an integer in which case also it is safer to assume that it goes in
6114 large data. */
6115 if (size <= 0 || size > ix86_section_threshold)
6116 return true;
6119 return false;
6122 /* i386-specific section flag to mark large sections. */
6123 #define SECTION_LARGE SECTION_MACH_DEP
6125 /* Switch to the appropriate section for output of DECL.
6126 DECL is either a `VAR_DECL' node or a constant of some sort.
6127 RELOC indicates whether forming the initial value of DECL requires
6128 link-time relocations. */
6130 ATTRIBUTE_UNUSED static section *
6131 x86_64_elf_select_section (tree decl, int reloc,
6132 unsigned HOST_WIDE_INT align)
6134 if (ix86_in_large_data_p (decl))
6136 const char *sname = NULL;
6137 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
6138 switch (categorize_decl_for_section (decl, reloc))
6140 case SECCAT_DATA:
6141 sname = ".ldata";
6142 break;
6143 case SECCAT_DATA_REL:
6144 sname = ".ldata.rel";
6145 break;
6146 case SECCAT_DATA_REL_LOCAL:
6147 sname = ".ldata.rel.local";
6148 break;
6149 case SECCAT_DATA_REL_RO:
6150 sname = ".ldata.rel.ro";
6151 break;
6152 case SECCAT_DATA_REL_RO_LOCAL:
6153 sname = ".ldata.rel.ro.local";
6154 break;
6155 case SECCAT_BSS:
6156 sname = ".lbss";
6157 flags |= SECTION_BSS;
6158 break;
6159 case SECCAT_RODATA:
6160 case SECCAT_RODATA_MERGE_STR:
6161 case SECCAT_RODATA_MERGE_STR_INIT:
6162 case SECCAT_RODATA_MERGE_CONST:
6163 sname = ".lrodata";
6164 flags &= ~SECTION_WRITE;
6165 break;
6166 case SECCAT_SRODATA:
6167 case SECCAT_SDATA:
6168 case SECCAT_SBSS:
6169 gcc_unreachable ();
6170 case SECCAT_TEXT:
6171 case SECCAT_TDATA:
6172 case SECCAT_TBSS:
6173 /* We don't split these for medium model. Place them into
6174 default sections and hope for best. */
6175 break;
6177 if (sname)
6179 /* We might get called with string constants, but get_named_section
6180 doesn't like them as they are not DECLs. Also, we need to set
6181 flags in that case. */
6182 if (!DECL_P (decl))
6183 return get_section (sname, flags, NULL);
6184 return get_named_section (decl, sname, reloc);
6187 return default_elf_select_section (decl, reloc, align);
6190 /* Select a set of attributes for section NAME based on the properties
6191 of DECL and whether or not RELOC indicates that DECL's initializer
6192 might contain runtime relocations. */
6194 static unsigned int ATTRIBUTE_UNUSED
6195 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
6197 unsigned int flags = default_section_type_flags (decl, name, reloc);
6199 if (ix86_in_large_data_p (decl))
6200 flags |= SECTION_LARGE;
6202 if (decl == NULL_TREE
6203 && (strcmp (name, ".ldata.rel.ro") == 0
6204 || strcmp (name, ".ldata.rel.ro.local") == 0))
6205 flags |= SECTION_RELRO;
6207 if (strcmp (name, ".lbss") == 0
6208 || strncmp (name, ".lbss.", 5) == 0
6209 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
6210 flags |= SECTION_BSS;
6212 return flags;
6215 /* Build up a unique section name, expressed as a
6216 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
6217 RELOC indicates whether the initial value of EXP requires
6218 link-time relocations. */
6220 static void ATTRIBUTE_UNUSED
6221 x86_64_elf_unique_section (tree decl, int reloc)
6223 if (ix86_in_large_data_p (decl))
6225 const char *prefix = NULL;
6226 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
6227 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
6229 switch (categorize_decl_for_section (decl, reloc))
6231 case SECCAT_DATA:
6232 case SECCAT_DATA_REL:
6233 case SECCAT_DATA_REL_LOCAL:
6234 case SECCAT_DATA_REL_RO:
6235 case SECCAT_DATA_REL_RO_LOCAL:
6236 prefix = one_only ? ".ld" : ".ldata";
6237 break;
6238 case SECCAT_BSS:
6239 prefix = one_only ? ".lb" : ".lbss";
6240 break;
6241 case SECCAT_RODATA:
6242 case SECCAT_RODATA_MERGE_STR:
6243 case SECCAT_RODATA_MERGE_STR_INIT:
6244 case SECCAT_RODATA_MERGE_CONST:
6245 prefix = one_only ? ".lr" : ".lrodata";
6246 break;
6247 case SECCAT_SRODATA:
6248 case SECCAT_SDATA:
6249 case SECCAT_SBSS:
6250 gcc_unreachable ();
6251 case SECCAT_TEXT:
6252 case SECCAT_TDATA:
6253 case SECCAT_TBSS:
6254 /* We don't split these for medium model. Place them into
6255 default sections and hope for best. */
6256 break;
6258 if (prefix)
6260 const char *name, *linkonce;
6261 char *string;
6263 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6264 name = targetm.strip_name_encoding (name);
6266 /* If we're using one_only, then there needs to be a .gnu.linkonce
6267 prefix to the section name. */
6268 linkonce = one_only ? ".gnu.linkonce" : "";
6270 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6272 set_decl_section_name (decl, string);
6273 return;
6276 default_unique_section (decl, reloc);
6279 #ifdef COMMON_ASM_OP
6281 #ifndef LARGECOMM_SECTION_ASM_OP
6282 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
6283 #endif
6285 /* This says how to output assembler code to declare an
6286 uninitialized external linkage data object.
6288 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
6289 large objects. */
6290 void
6291 x86_elf_aligned_decl_common (FILE *file, tree decl,
6292 const char *name, unsigned HOST_WIDE_INT size,
6293 int align)
6295 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6296 && size > (unsigned int)ix86_section_threshold)
6298 switch_to_section (get_named_section (decl, ".lbss", 0));
6299 fputs (LARGECOMM_SECTION_ASM_OP, file);
6301 else
6302 fputs (COMMON_ASM_OP, file);
6303 assemble_name (file, name);
6304 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
6305 size, align / BITS_PER_UNIT);
6307 #endif
6309 /* Utility function for targets to use in implementing
6310 ASM_OUTPUT_ALIGNED_BSS. */
6312 void
6313 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
6314 unsigned HOST_WIDE_INT size, int align)
6316 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6317 && size > (unsigned int)ix86_section_threshold)
6318 switch_to_section (get_named_section (decl, ".lbss", 0));
6319 else
6320 switch_to_section (bss_section);
6321 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
6322 #ifdef ASM_DECLARE_OBJECT_NAME
6323 last_assemble_variable_decl = decl;
6324 ASM_DECLARE_OBJECT_NAME (file, name, decl);
6325 #else
6326 /* Standard thing is just output label for the object. */
6327 ASM_OUTPUT_LABEL (file, name);
6328 #endif /* ASM_DECLARE_OBJECT_NAME */
6329 ASM_OUTPUT_SKIP (file, size ? size : 1);
6332 /* Decide whether we must probe the stack before any space allocation
6333 on this target. It's essentially TARGET_STACK_PROBE except when
6334 -fstack-check causes the stack to be already probed differently. */
6336 bool
6337 ix86_target_stack_probe (void)
6339 /* Do not probe the stack twice if static stack checking is enabled. */
6340 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6341 return false;
6343 return TARGET_STACK_PROBE;
6346 /* Decide whether we can make a sibling call to a function. DECL is the
6347 declaration of the function being targeted by the call and EXP is the
6348 CALL_EXPR representing the call. */
6350 static bool
6351 ix86_function_ok_for_sibcall (tree decl, tree exp)
6353 tree type, decl_or_type;
6354 rtx a, b;
6355 bool bind_global = decl && !targetm.binds_local_p (decl);
6357 if (ix86_function_naked (current_function_decl))
6358 return false;
6360 /* Sibling call isn't OK if there are no caller-saved registers
6361 since all registers must be preserved before return. */
6362 if (cfun->machine->no_caller_saved_registers)
6363 return false;
6365 /* If we are generating position-independent code, we cannot sibcall
6366 optimize direct calls to global functions, as the PLT requires
6367 %ebx be live. (Darwin does not have a PLT.) */
6368 if (!TARGET_MACHO
6369 && !TARGET_64BIT
6370 && flag_pic
6371 && flag_plt
6372 && bind_global)
6373 return false;
6375 /* If we need to align the outgoing stack, then sibcalling would
6376 unalign the stack, which may break the called function. */
6377 if (ix86_minimum_incoming_stack_boundary (true)
6378 < PREFERRED_STACK_BOUNDARY)
6379 return false;
6381 if (decl)
6383 decl_or_type = decl;
6384 type = TREE_TYPE (decl);
6386 else
6388 /* We're looking at the CALL_EXPR, we need the type of the function. */
6389 type = CALL_EXPR_FN (exp); /* pointer expression */
6390 type = TREE_TYPE (type); /* pointer type */
6391 type = TREE_TYPE (type); /* function type */
6392 decl_or_type = type;
6395 /* Check that the return value locations are the same. Like
6396 if we are returning floats on the 80387 register stack, we cannot
6397 make a sibcall from a function that doesn't return a float to a
6398 function that does or, conversely, from a function that does return
6399 a float to a function that doesn't; the necessary stack adjustment
6400 would not be executed. This is also the place we notice
6401 differences in the return value ABI. Note that it is ok for one
6402 of the functions to have void return type as long as the return
6403 value of the other is passed in a register. */
6404 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6405 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6406 cfun->decl, false);
6407 if (STACK_REG_P (a) || STACK_REG_P (b))
6409 if (!rtx_equal_p (a, b))
6410 return false;
6412 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6414 else if (!rtx_equal_p (a, b))
6415 return false;
6417 if (TARGET_64BIT)
6419 /* The SYSV ABI has more call-clobbered registers;
6420 disallow sibcalls from MS to SYSV. */
6421 if (cfun->machine->call_abi == MS_ABI
6422 && ix86_function_type_abi (type) == SYSV_ABI)
6423 return false;
6425 else
6427 /* If this call is indirect, we'll need to be able to use a
6428 call-clobbered register for the address of the target function.
6429 Make sure that all such registers are not used for passing
6430 parameters. Note that DLLIMPORT functions and call to global
6431 function via GOT slot are indirect. */
6432 if (!decl
6433 || (bind_global && flag_pic && !flag_plt)
6434 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
6435 || flag_force_indirect_call)
6437 /* Check if regparm >= 3 since arg_reg_available is set to
6438 false if regparm == 0. If regparm is 1 or 2, there is
6439 always a call-clobbered register available.
6441 ??? The symbol indirect call doesn't need a call-clobbered
6442 register. But we don't know if this is a symbol indirect
6443 call or not here. */
6444 if (ix86_function_regparm (type, decl) >= 3
6445 && !cfun->machine->arg_reg_available)
6446 return false;
6450 /* Otherwise okay. That also includes certain types of indirect calls. */
6451 return true;
6454 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6455 and "sseregparm" calling convention attributes;
6456 arguments as in struct attribute_spec.handler. */
6458 static tree
6459 ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
6460 bool *no_add_attrs)
6462 if (TREE_CODE (*node) != FUNCTION_TYPE
6463 && TREE_CODE (*node) != METHOD_TYPE
6464 && TREE_CODE (*node) != FIELD_DECL
6465 && TREE_CODE (*node) != TYPE_DECL)
6467 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6468 name);
6469 *no_add_attrs = true;
6470 return NULL_TREE;
6473 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6474 if (is_attribute_p ("regparm", name))
6476 tree cst;
6478 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6480 error ("fastcall and regparm attributes are not compatible");
6483 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6485 error ("regparam and thiscall attributes are not compatible");
6488 cst = TREE_VALUE (args);
6489 if (TREE_CODE (cst) != INTEGER_CST)
6491 warning (OPT_Wattributes,
6492 "%qE attribute requires an integer constant argument",
6493 name);
6494 *no_add_attrs = true;
6496 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6498 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6499 name, REGPARM_MAX);
6500 *no_add_attrs = true;
6503 return NULL_TREE;
6506 if (TARGET_64BIT)
6508 /* Do not warn when emulating the MS ABI. */
6509 if ((TREE_CODE (*node) != FUNCTION_TYPE
6510 && TREE_CODE (*node) != METHOD_TYPE)
6511 || ix86_function_type_abi (*node) != MS_ABI)
6512 warning (OPT_Wattributes, "%qE attribute ignored",
6513 name);
6514 *no_add_attrs = true;
6515 return NULL_TREE;
6518 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6519 if (is_attribute_p ("fastcall", name))
6521 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6523 error ("fastcall and cdecl attributes are not compatible");
6525 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6527 error ("fastcall and stdcall attributes are not compatible");
6529 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6531 error ("fastcall and regparm attributes are not compatible");
6533 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6535 error ("fastcall and thiscall attributes are not compatible");
6539 /* Can combine stdcall with fastcall (redundant), regparm and
6540 sseregparm. */
6541 else if (is_attribute_p ("stdcall", name))
6543 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6545 error ("stdcall and cdecl attributes are not compatible");
6547 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6549 error ("stdcall and fastcall attributes are not compatible");
6551 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6553 error ("stdcall and thiscall attributes are not compatible");
6557 /* Can combine cdecl with regparm and sseregparm. */
6558 else if (is_attribute_p ("cdecl", name))
6560 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6562 error ("stdcall and cdecl attributes are not compatible");
6564 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6566 error ("fastcall and cdecl attributes are not compatible");
6568 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6570 error ("cdecl and thiscall attributes are not compatible");
6573 else if (is_attribute_p ("thiscall", name))
6575 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6576 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6577 name);
6578 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6580 error ("stdcall and thiscall attributes are not compatible");
6582 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6584 error ("fastcall and thiscall attributes are not compatible");
6586 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6588 error ("cdecl and thiscall attributes are not compatible");
6592 /* Can combine sseregparm with all attributes. */
6594 return NULL_TREE;
6597 /* The transactional memory builtins are implicitly regparm or fastcall
6598 depending on the ABI. Override the generic do-nothing attribute that
6599 these builtins were declared with, and replace it with one of the two
6600 attributes that we expect elsewhere. */
6602 static tree
6603 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6604 int flags, bool *no_add_attrs)
6606 tree alt;
6608 /* In no case do we want to add the placeholder attribute. */
6609 *no_add_attrs = true;
6611 /* The 64-bit ABI is unchanged for transactional memory. */
6612 if (TARGET_64BIT)
6613 return NULL_TREE;
6615 /* ??? Is there a better way to validate 32-bit windows? We have
6616 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6617 if (CHECK_STACK_LIMIT > 0)
6618 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6619 else
6621 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6622 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6624 decl_attributes (node, alt, flags);
6626 return NULL_TREE;
6629 /* This function determines from TYPE the calling-convention. */
6631 unsigned int
6632 ix86_get_callcvt (const_tree type)
6634 unsigned int ret = 0;
6635 bool is_stdarg;
6636 tree attrs;
6638 if (TARGET_64BIT)
6639 return IX86_CALLCVT_CDECL;
6641 attrs = TYPE_ATTRIBUTES (type);
6642 if (attrs != NULL_TREE)
6644 if (lookup_attribute ("cdecl", attrs))
6645 ret |= IX86_CALLCVT_CDECL;
6646 else if (lookup_attribute ("stdcall", attrs))
6647 ret |= IX86_CALLCVT_STDCALL;
6648 else if (lookup_attribute ("fastcall", attrs))
6649 ret |= IX86_CALLCVT_FASTCALL;
6650 else if (lookup_attribute ("thiscall", attrs))
6651 ret |= IX86_CALLCVT_THISCALL;
6653 /* Regparam isn't allowed for thiscall and fastcall. */
6654 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
6656 if (lookup_attribute ("regparm", attrs))
6657 ret |= IX86_CALLCVT_REGPARM;
6658 if (lookup_attribute ("sseregparm", attrs))
6659 ret |= IX86_CALLCVT_SSEREGPARM;
6662 if (IX86_BASE_CALLCVT(ret) != 0)
6663 return ret;
6666 is_stdarg = stdarg_p (type);
6667 if (TARGET_RTD && !is_stdarg)
6668 return IX86_CALLCVT_STDCALL | ret;
6670 if (ret != 0
6671 || is_stdarg
6672 || TREE_CODE (type) != METHOD_TYPE
6673 || ix86_function_type_abi (type) != MS_ABI)
6674 return IX86_CALLCVT_CDECL | ret;
6676 return IX86_CALLCVT_THISCALL;
6679 /* Return 0 if the attributes for two types are incompatible, 1 if they
6680 are compatible, and 2 if they are nearly compatible (which causes a
6681 warning to be generated). */
6683 static int
6684 ix86_comp_type_attributes (const_tree type1, const_tree type2)
6686 unsigned int ccvt1, ccvt2;
6688 if (TREE_CODE (type1) != FUNCTION_TYPE
6689 && TREE_CODE (type1) != METHOD_TYPE)
6690 return 1;
6692 ccvt1 = ix86_get_callcvt (type1);
6693 ccvt2 = ix86_get_callcvt (type2);
6694 if (ccvt1 != ccvt2)
6695 return 0;
6696 if (ix86_function_regparm (type1, NULL)
6697 != ix86_function_regparm (type2, NULL))
6698 return 0;
6700 return 1;
6703 /* Return the regparm value for a function with the indicated TYPE and DECL.
6704 DECL may be NULL when calling function indirectly
6705 or considering a libcall. */
6707 static int
6708 ix86_function_regparm (const_tree type, const_tree decl)
6710 tree attr;
6711 int regparm;
6712 unsigned int ccvt;
6714 if (TARGET_64BIT)
6715 return (ix86_function_type_abi (type) == SYSV_ABI
6716 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6717 ccvt = ix86_get_callcvt (type);
6718 regparm = ix86_regparm;
6720 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
6722 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
6723 if (attr)
6725 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
6726 return regparm;
6729 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6730 return 2;
6731 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6732 return 1;
6734 /* Use register calling convention for local functions when possible. */
6735 if (decl
6736 && TREE_CODE (decl) == FUNCTION_DECL)
6738 cgraph_node *target = cgraph_node::get (decl);
6739 if (target)
6740 target = target->function_symbol ();
6742 /* Caller and callee must agree on the calling convention, so
6743 checking here just optimize means that with
6744 __attribute__((optimize (...))) caller could use regparm convention
6745 and callee not, or vice versa. Instead look at whether the callee
6746 is optimized or not. */
6747 if (target && opt_for_fn (target->decl, optimize)
6748 && !(profile_flag && !flag_fentry))
6750 cgraph_local_info *i = &target->local;
6751 if (i && i->local && i->can_change_signature)
6753 int local_regparm, globals = 0, regno;
6755 /* Make sure no regparm register is taken by a
6756 fixed register variable. */
6757 for (local_regparm = 0; local_regparm < REGPARM_MAX;
6758 local_regparm++)
6759 if (fixed_regs[local_regparm])
6760 break;
6762 /* We don't want to use regparm(3) for nested functions as
6763 these use a static chain pointer in the third argument. */
6764 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
6765 local_regparm = 2;
6767 /* Save a register for the split stack. */
6768 if (flag_split_stack)
6770 if (local_regparm == 3)
6771 local_regparm = 2;
6772 else if (local_regparm == 2
6773 && DECL_STATIC_CHAIN (target->decl))
6774 local_regparm = 1;
6777 /* Each fixed register usage increases register pressure,
6778 so less registers should be used for argument passing.
6779 This functionality can be overriden by an explicit
6780 regparm value. */
6781 for (regno = AX_REG; regno <= DI_REG; regno++)
6782 if (fixed_regs[regno])
6783 globals++;
6785 local_regparm
6786 = globals < local_regparm ? local_regparm - globals : 0;
6788 if (local_regparm > regparm)
6789 regparm = local_regparm;
6794 return regparm;
6797 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
6798 DFmode (2) arguments in SSE registers for a function with the
6799 indicated TYPE and DECL. DECL may be NULL when calling function
6800 indirectly or considering a libcall. Return -1 if any FP parameter
6801 should be rejected by error. This is used in siutation we imply SSE
6802 calling convetion but the function is called from another function with
6803 SSE disabled. Otherwise return 0. */
6805 static int
6806 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
6808 gcc_assert (!TARGET_64BIT);
6810 /* Use SSE registers to pass SFmode and DFmode arguments if requested
6811 by the sseregparm attribute. */
6812 if (TARGET_SSEREGPARM
6813 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
6815 if (!TARGET_SSE)
6817 if (warn)
6819 if (decl)
6820 error ("calling %qD with attribute sseregparm without "
6821 "SSE/SSE2 enabled", decl);
6822 else
6823 error ("calling %qT with attribute sseregparm without "
6824 "SSE/SSE2 enabled", type);
6826 return 0;
6829 return 2;
6832 if (!decl)
6833 return 0;
6835 cgraph_node *target = cgraph_node::get (decl);
6836 if (target)
6837 target = target->function_symbol ();
6839 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
6840 (and DFmode for SSE2) arguments in SSE registers. */
6841 if (target
6842 /* TARGET_SSE_MATH */
6843 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
6844 && opt_for_fn (target->decl, optimize)
6845 && !(profile_flag && !flag_fentry))
6847 cgraph_local_info *i = &target->local;
6848 if (i && i->local && i->can_change_signature)
6850 /* Refuse to produce wrong code when local function with SSE enabled
6851 is called from SSE disabled function.
6852 FIXME: We need a way to detect these cases cross-ltrans partition
6853 and avoid using SSE calling conventions on local functions called
6854 from function with SSE disabled. For now at least delay the
6855 warning until we know we are going to produce wrong code.
6856 See PR66047 */
6857 if (!TARGET_SSE && warn)
6858 return -1;
6859 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
6860 ->x_ix86_isa_flags) ? 2 : 1;
6864 return 0;
6867 /* Return true if EAX is live at the start of the function. Used by
6868 ix86_expand_prologue to determine if we need special help before
6869 calling allocate_stack_worker. */
6871 static bool
6872 ix86_eax_live_at_start_p (void)
6874 /* Cheat. Don't bother working forward from ix86_function_regparm
6875 to the function type to whether an actual argument is located in
6876 eax. Instead just look at cfg info, which is still close enough
6877 to correct at this point. This gives false positives for broken
6878 functions that might use uninitialized data that happens to be
6879 allocated in eax, but who cares? */
6880 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
6883 static bool
6884 ix86_keep_aggregate_return_pointer (tree fntype)
6886 tree attr;
6888 if (!TARGET_64BIT)
6890 attr = lookup_attribute ("callee_pop_aggregate_return",
6891 TYPE_ATTRIBUTES (fntype));
6892 if (attr)
6893 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6895 /* For 32-bit MS-ABI the default is to keep aggregate
6896 return pointer. */
6897 if (ix86_function_type_abi (fntype) == MS_ABI)
6898 return true;
6900 return KEEP_AGGREGATE_RETURN_POINTER != 0;
6903 /* Value is the number of bytes of arguments automatically
6904 popped when returning from a subroutine call.
6905 FUNDECL is the declaration node of the function (as a tree),
6906 FUNTYPE is the data type of the function (as a tree),
6907 or for a library call it is an identifier node for the subroutine name.
6908 SIZE is the number of bytes of arguments passed on the stack.
6910 On the 80386, the RTD insn may be used to pop them if the number
6911 of args is fixed, but if the number is variable then the caller
6912 must pop them all. RTD can't be used for library calls now
6913 because the library is compiled with the Unix compiler.
6914 Use of RTD is a selectable option, since it is incompatible with
6915 standard Unix calling sequences. If the option is not selected,
6916 the caller must always pop the args.
6918 The attribute stdcall is equivalent to RTD on a per module basis. */
6920 static poly_int64
6921 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
6923 unsigned int ccvt;
6925 /* None of the 64-bit ABIs pop arguments. */
6926 if (TARGET_64BIT)
6927 return 0;
6929 ccvt = ix86_get_callcvt (funtype);
6931 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6932 | IX86_CALLCVT_THISCALL)) != 0
6933 && ! stdarg_p (funtype))
6934 return size;
6936 /* Lose any fake structure return argument if it is passed on the stack. */
6937 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6938 && !ix86_keep_aggregate_return_pointer (funtype))
6940 int nregs = ix86_function_regparm (funtype, fundecl);
6941 if (nregs == 0)
6942 return GET_MODE_SIZE (Pmode);
6945 return 0;
6948 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6950 static bool
6951 ix86_legitimate_combined_insn (rtx_insn *insn)
6953 int i;
6955 /* Check operand constraints in case hard registers were propagated
6956 into insn pattern. This check prevents combine pass from
6957 generating insn patterns with invalid hard register operands.
6958 These invalid insns can eventually confuse reload to error out
6959 with a spill failure. See also PRs 46829 and 46843. */
6961 gcc_assert (INSN_CODE (insn) >= 0);
6963 extract_insn (insn);
6964 preprocess_constraints (insn);
6966 int n_operands = recog_data.n_operands;
6967 int n_alternatives = recog_data.n_alternatives;
6968 for (i = 0; i < n_operands; i++)
6970 rtx op = recog_data.operand[i];
6971 machine_mode mode = GET_MODE (op);
6972 const operand_alternative *op_alt;
6973 int offset = 0;
6974 bool win;
6975 int j;
6977 /* A unary operator may be accepted by the predicate, but it
6978 is irrelevant for matching constraints. */
6979 if (UNARY_P (op))
6980 op = XEXP (op, 0);
6982 if (SUBREG_P (op))
6984 if (REG_P (SUBREG_REG (op))
6985 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6986 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6987 GET_MODE (SUBREG_REG (op)),
6988 SUBREG_BYTE (op),
6989 GET_MODE (op));
6990 op = SUBREG_REG (op);
6993 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6994 continue;
6996 op_alt = recog_op_alt;
6998 /* Operand has no constraints, anything is OK. */
6999 win = !n_alternatives;
7001 alternative_mask preferred = get_preferred_alternatives (insn);
7002 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
7004 if (!TEST_BIT (preferred, j))
7005 continue;
7006 if (op_alt[i].anything_ok
7007 || (op_alt[i].matches != -1
7008 && operands_match_p
7009 (recog_data.operand[i],
7010 recog_data.operand[op_alt[i].matches]))
7011 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
7013 win = true;
7014 break;
7018 if (!win)
7019 return false;
7022 return true;
7025 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
7027 static unsigned HOST_WIDE_INT
7028 ix86_asan_shadow_offset (void)
7030 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
7031 : HOST_WIDE_INT_C (0x7fff8000))
7032 : (HOST_WIDE_INT_1 << 29);
7035 /* Argument support functions. */
7037 /* Return true when register may be used to pass function parameters. */
7038 bool
7039 ix86_function_arg_regno_p (int regno)
7041 int i;
7042 enum calling_abi call_abi;
7043 const int *parm_regs;
7045 if (TARGET_MPX && BND_REGNO_P (regno))
7046 return true;
7048 if (!TARGET_64BIT)
7050 if (TARGET_MACHO)
7051 return (regno < REGPARM_MAX
7052 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
7053 else
7054 return (regno < REGPARM_MAX
7055 || (TARGET_MMX && MMX_REGNO_P (regno)
7056 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
7057 || (TARGET_SSE && SSE_REGNO_P (regno)
7058 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
7061 if (TARGET_SSE && SSE_REGNO_P (regno)
7062 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
7063 return true;
7065 /* TODO: The function should depend on current function ABI but
7066 builtins.c would need updating then. Therefore we use the
7067 default ABI. */
7068 call_abi = ix86_cfun_abi ();
7070 /* RAX is used as hidden argument to va_arg functions. */
7071 if (call_abi == SYSV_ABI && regno == AX_REG)
7072 return true;
7074 if (call_abi == MS_ABI)
7075 parm_regs = x86_64_ms_abi_int_parameter_registers;
7076 else
7077 parm_regs = x86_64_int_parameter_registers;
7079 for (i = 0; i < (call_abi == MS_ABI
7080 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
7081 if (regno == parm_regs[i])
7082 return true;
7083 return false;
7086 /* Return if we do not know how to pass TYPE solely in registers. */
7088 static bool
7089 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
7091 if (must_pass_in_stack_var_size_or_pad (mode, type))
7092 return true;
7094 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
7095 The layout_type routine is crafty and tries to trick us into passing
7096 currently unsupported vector types on the stack by using TImode. */
7097 return (!TARGET_64BIT && mode == TImode
7098 && type && TREE_CODE (type) != VECTOR_TYPE);
7101 /* It returns the size, in bytes, of the area reserved for arguments passed
7102 in registers for the function represented by fndecl dependent to the used
7103 abi format. */
7105 ix86_reg_parm_stack_space (const_tree fndecl)
7107 enum calling_abi call_abi = SYSV_ABI;
7108 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
7109 call_abi = ix86_function_abi (fndecl);
7110 else
7111 call_abi = ix86_function_type_abi (fndecl);
7112 if (TARGET_64BIT && call_abi == MS_ABI)
7113 return 32;
7114 return 0;
7117 /* We add this as a workaround in order to use libc_has_function
7118 hook in i386.md. */
7119 bool
7120 ix86_libc_has_function (enum function_class fn_class)
7122 return targetm.libc_has_function (fn_class);
7125 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
7126 specifying the call abi used. */
7127 enum calling_abi
7128 ix86_function_type_abi (const_tree fntype)
7130 enum calling_abi abi = ix86_abi;
7132 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
7133 return abi;
7135 if (abi == SYSV_ABI
7136 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
7138 static int warned;
7139 if (TARGET_X32 && !warned)
7141 error ("X32 does not support ms_abi attribute");
7142 warned = 1;
7145 abi = MS_ABI;
7147 else if (abi == MS_ABI
7148 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
7149 abi = SYSV_ABI;
7151 return abi;
7154 static enum calling_abi
7155 ix86_function_abi (const_tree fndecl)
7157 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
7160 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
7161 specifying the call abi used. */
7162 enum calling_abi
7163 ix86_cfun_abi (void)
7165 return cfun ? cfun->machine->call_abi : ix86_abi;
7168 static bool
7169 ix86_function_ms_hook_prologue (const_tree fn)
7171 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
7173 if (decl_function_context (fn) != NULL_TREE)
7174 error_at (DECL_SOURCE_LOCATION (fn),
7175 "ms_hook_prologue is not compatible with nested function");
7176 else
7177 return true;
7179 return false;
7182 static bool
7183 ix86_function_naked (const_tree fn)
7185 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
7186 return true;
7188 return false;
7191 /* Write the extra assembler code needed to declare a function properly. */
7193 void
7194 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
7195 tree decl)
7197 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
7199 if (is_ms_hook)
7201 int i, filler_count = (TARGET_64BIT ? 32 : 16);
7202 unsigned int filler_cc = 0xcccccccc;
7204 for (i = 0; i < filler_count; i += 4)
7205 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
7208 #ifdef SUBTARGET_ASM_UNWIND_INIT
7209 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
7210 #endif
7212 ASM_OUTPUT_LABEL (asm_out_file, fname);
7214 /* Output magic byte marker, if hot-patch attribute is set. */
7215 if (is_ms_hook)
7217 if (TARGET_64BIT)
7219 /* leaq [%rsp + 0], %rsp */
7220 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
7221 asm_out_file);
7223 else
7225 /* movl.s %edi, %edi
7226 push %ebp
7227 movl.s %esp, %ebp */
7228 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
7233 /* Implementation of call abi switching target hook. Specific to FNDECL
7234 the specific call register sets are set. See also
7235 ix86_conditional_register_usage for more details. */
7236 void
7237 ix86_call_abi_override (const_tree fndecl)
7239 cfun->machine->call_abi = ix86_function_abi (fndecl);
7242 /* Return 1 if pseudo register should be created and used to hold
7243 GOT address for PIC code. */
7244 bool
7245 ix86_use_pseudo_pic_reg (void)
7247 if ((TARGET_64BIT
7248 && (ix86_cmodel == CM_SMALL_PIC
7249 || TARGET_PECOFF))
7250 || !flag_pic)
7251 return false;
7252 return true;
7255 /* Initialize large model PIC register. */
7257 static void
7258 ix86_init_large_pic_reg (unsigned int tmp_regno)
7260 rtx_code_label *label;
7261 rtx tmp_reg;
7263 gcc_assert (Pmode == DImode);
7264 label = gen_label_rtx ();
7265 emit_label (label);
7266 LABEL_PRESERVE_P (label) = 1;
7267 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
7268 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
7269 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
7270 label));
7271 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7272 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
7273 pic_offset_table_rtx, tmp_reg));
7274 const char *name = LABEL_NAME (label);
7275 PUT_CODE (label, NOTE);
7276 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
7277 NOTE_DELETED_LABEL_NAME (label) = name;
7280 /* Create and initialize PIC register if required. */
7281 static void
7282 ix86_init_pic_reg (void)
7284 edge entry_edge;
7285 rtx_insn *seq;
7287 if (!ix86_use_pseudo_pic_reg ())
7288 return;
7290 start_sequence ();
7292 if (TARGET_64BIT)
7294 if (ix86_cmodel == CM_LARGE_PIC)
7295 ix86_init_large_pic_reg (R11_REG);
7296 else
7297 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7299 else
7301 /* If there is future mcount call in the function it is more profitable
7302 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
7303 rtx reg = crtl->profile
7304 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
7305 : pic_offset_table_rtx;
7306 rtx_insn *insn = emit_insn (gen_set_got (reg));
7307 RTX_FRAME_RELATED_P (insn) = 1;
7308 if (crtl->profile)
7309 emit_move_insn (pic_offset_table_rtx, reg);
7310 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
7313 seq = get_insns ();
7314 end_sequence ();
7316 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7317 insert_insn_on_edge (seq, entry_edge);
7318 commit_one_edge_insertion (entry_edge);
7321 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7322 for a call to a function whose data type is FNTYPE.
7323 For a library call, FNTYPE is 0. */
7325 void
7326 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
7327 tree fntype, /* tree ptr for function decl */
7328 rtx libname, /* SYMBOL_REF of library name or 0 */
7329 tree fndecl,
7330 int caller)
7332 struct cgraph_local_info *i = NULL;
7333 struct cgraph_node *target = NULL;
7335 memset (cum, 0, sizeof (*cum));
7337 if (fndecl)
7339 target = cgraph_node::get (fndecl);
7340 if (target)
7342 target = target->function_symbol ();
7343 i = cgraph_node::local_info (target->decl);
7344 cum->call_abi = ix86_function_abi (target->decl);
7346 else
7347 cum->call_abi = ix86_function_abi (fndecl);
7349 else
7350 cum->call_abi = ix86_function_type_abi (fntype);
7352 cum->caller = caller;
7354 /* Set up the number of registers to use for passing arguments. */
7355 cum->nregs = ix86_regparm;
7356 if (TARGET_64BIT)
7358 cum->nregs = (cum->call_abi == SYSV_ABI
7359 ? X86_64_REGPARM_MAX
7360 : X86_64_MS_REGPARM_MAX);
7362 if (TARGET_SSE)
7364 cum->sse_nregs = SSE_REGPARM_MAX;
7365 if (TARGET_64BIT)
7367 cum->sse_nregs = (cum->call_abi == SYSV_ABI
7368 ? X86_64_SSE_REGPARM_MAX
7369 : X86_64_MS_SSE_REGPARM_MAX);
7372 if (TARGET_MMX)
7373 cum->mmx_nregs = MMX_REGPARM_MAX;
7374 cum->warn_avx512f = true;
7375 cum->warn_avx = true;
7376 cum->warn_sse = true;
7377 cum->warn_mmx = true;
7379 /* Because type might mismatch in between caller and callee, we need to
7380 use actual type of function for local calls.
7381 FIXME: cgraph_analyze can be told to actually record if function uses
7382 va_start so for local functions maybe_vaarg can be made aggressive
7383 helping K&R code.
7384 FIXME: once typesytem is fixed, we won't need this code anymore. */
7385 if (i && i->local && i->can_change_signature)
7386 fntype = TREE_TYPE (target->decl);
7387 cum->stdarg = stdarg_p (fntype);
7388 cum->maybe_vaarg = (fntype
7389 ? (!prototype_p (fntype) || stdarg_p (fntype))
7390 : !libname);
7392 cum->bnd_regno = FIRST_BND_REG;
7393 cum->bnds_in_bt = 0;
7394 cum->force_bnd_pass = 0;
7395 cum->decl = fndecl;
7397 cum->warn_empty = !warn_abi || cum->stdarg;
7398 if (!cum->warn_empty && fntype)
7400 function_args_iterator iter;
7401 tree argtype;
7402 bool seen_empty_type = false;
7403 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
7405 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
7406 break;
7407 if (TYPE_EMPTY_P (argtype))
7408 seen_empty_type = true;
7409 else if (seen_empty_type)
7411 cum->warn_empty = true;
7412 break;
7417 if (!TARGET_64BIT)
7419 /* If there are variable arguments, then we won't pass anything
7420 in registers in 32-bit mode. */
7421 if (stdarg_p (fntype))
7423 cum->nregs = 0;
7424 /* Since in 32-bit, variable arguments are always passed on
7425 stack, there is scratch register available for indirect
7426 sibcall. */
7427 cfun->machine->arg_reg_available = true;
7428 cum->sse_nregs = 0;
7429 cum->mmx_nregs = 0;
7430 cum->warn_avx512f = false;
7431 cum->warn_avx = false;
7432 cum->warn_sse = false;
7433 cum->warn_mmx = false;
7434 return;
7437 /* Use ecx and edx registers if function has fastcall attribute,
7438 else look for regparm information. */
7439 if (fntype)
7441 unsigned int ccvt = ix86_get_callcvt (fntype);
7442 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7444 cum->nregs = 1;
7445 cum->fastcall = 1; /* Same first register as in fastcall. */
7447 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7449 cum->nregs = 2;
7450 cum->fastcall = 1;
7452 else
7453 cum->nregs = ix86_function_regparm (fntype, fndecl);
7456 /* Set up the number of SSE registers used for passing SFmode
7457 and DFmode arguments. Warn for mismatching ABI. */
7458 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7461 cfun->machine->arg_reg_available = (cum->nregs > 0);
7464 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7465 But in the case of vector types, it is some vector mode.
7467 When we have only some of our vector isa extensions enabled, then there
7468 are some modes for which vector_mode_supported_p is false. For these
7469 modes, the generic vector support in gcc will choose some non-vector mode
7470 in order to implement the type. By computing the natural mode, we'll
7471 select the proper ABI location for the operand and not depend on whatever
7472 the middle-end decides to do with these vector types.
7474 The midde-end can't deal with the vector types > 16 bytes. In this
7475 case, we return the original mode and warn ABI change if CUM isn't
7476 NULL.
7478 If INT_RETURN is true, warn ABI change if the vector mode isn't
7479 available for function return value. */
7481 static machine_mode
7482 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7483 bool in_return)
7485 machine_mode mode = TYPE_MODE (type);
7487 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7489 HOST_WIDE_INT size = int_size_in_bytes (type);
7490 if ((size == 8 || size == 16 || size == 32 || size == 64)
7491 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7492 && TYPE_VECTOR_SUBPARTS (type) > 1)
7494 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7496 /* There are no XFmode vector modes. */
7497 if (innermode == XFmode)
7498 return mode;
7500 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7501 mode = MIN_MODE_VECTOR_FLOAT;
7502 else
7503 mode = MIN_MODE_VECTOR_INT;
7505 /* Get the mode which has this inner mode and number of units. */
7506 FOR_EACH_MODE_FROM (mode, mode)
7507 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7508 && GET_MODE_INNER (mode) == innermode)
7510 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7512 static bool warnedavx512f;
7513 static bool warnedavx512f_ret;
7515 if (cum && cum->warn_avx512f && !warnedavx512f)
7517 if (warning (OPT_Wpsabi, "AVX512F vector argument "
7518 "without AVX512F enabled changes the ABI"))
7519 warnedavx512f = true;
7521 else if (in_return && !warnedavx512f_ret)
7523 if (warning (OPT_Wpsabi, "AVX512F vector return "
7524 "without AVX512F enabled changes the ABI"))
7525 warnedavx512f_ret = true;
7528 return TYPE_MODE (type);
7530 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7532 static bool warnedavx;
7533 static bool warnedavx_ret;
7535 if (cum && cum->warn_avx && !warnedavx)
7537 if (warning (OPT_Wpsabi, "AVX vector argument "
7538 "without AVX enabled changes the ABI"))
7539 warnedavx = true;
7541 else if (in_return && !warnedavx_ret)
7543 if (warning (OPT_Wpsabi, "AVX vector return "
7544 "without AVX enabled changes the ABI"))
7545 warnedavx_ret = true;
7548 return TYPE_MODE (type);
7550 else if (((size == 8 && TARGET_64BIT) || size == 16)
7551 && !TARGET_SSE
7552 && !TARGET_IAMCU)
7554 static bool warnedsse;
7555 static bool warnedsse_ret;
7557 if (cum && cum->warn_sse && !warnedsse)
7559 if (warning (OPT_Wpsabi, "SSE vector argument "
7560 "without SSE enabled changes the ABI"))
7561 warnedsse = true;
7563 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7565 if (warning (OPT_Wpsabi, "SSE vector return "
7566 "without SSE enabled changes the ABI"))
7567 warnedsse_ret = true;
7570 else if ((size == 8 && !TARGET_64BIT)
7571 && (!cfun
7572 || cfun->machine->func_type == TYPE_NORMAL)
7573 && !TARGET_MMX
7574 && !TARGET_IAMCU)
7576 static bool warnedmmx;
7577 static bool warnedmmx_ret;
7579 if (cum && cum->warn_mmx && !warnedmmx)
7581 if (warning (OPT_Wpsabi, "MMX vector argument "
7582 "without MMX enabled changes the ABI"))
7583 warnedmmx = true;
7585 else if (in_return && !warnedmmx_ret)
7587 if (warning (OPT_Wpsabi, "MMX vector return "
7588 "without MMX enabled changes the ABI"))
7589 warnedmmx_ret = true;
7592 return mode;
7595 gcc_unreachable ();
7599 return mode;
7602 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7603 this may not agree with the mode that the type system has chosen for the
7604 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7605 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7607 static rtx
7608 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7609 unsigned int regno)
7611 rtx tmp;
7613 if (orig_mode != BLKmode)
7614 tmp = gen_rtx_REG (orig_mode, regno);
7615 else
7617 tmp = gen_rtx_REG (mode, regno);
7618 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7619 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7622 return tmp;
7625 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7626 of this code is to classify each 8bytes of incoming argument by the register
7627 class and assign registers accordingly. */
7629 /* Return the union class of CLASS1 and CLASS2.
7630 See the x86-64 PS ABI for details. */
7632 static enum x86_64_reg_class
7633 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7635 /* Rule #1: If both classes are equal, this is the resulting class. */
7636 if (class1 == class2)
7637 return class1;
7639 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7640 the other class. */
7641 if (class1 == X86_64_NO_CLASS)
7642 return class2;
7643 if (class2 == X86_64_NO_CLASS)
7644 return class1;
7646 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7647 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7648 return X86_64_MEMORY_CLASS;
7650 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7651 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7652 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7653 return X86_64_INTEGERSI_CLASS;
7654 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7655 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7656 return X86_64_INTEGER_CLASS;
7658 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7659 MEMORY is used. */
7660 if (class1 == X86_64_X87_CLASS
7661 || class1 == X86_64_X87UP_CLASS
7662 || class1 == X86_64_COMPLEX_X87_CLASS
7663 || class2 == X86_64_X87_CLASS
7664 || class2 == X86_64_X87UP_CLASS
7665 || class2 == X86_64_COMPLEX_X87_CLASS)
7666 return X86_64_MEMORY_CLASS;
7668 /* Rule #6: Otherwise class SSE is used. */
7669 return X86_64_SSE_CLASS;
7672 /* Classify the argument of type TYPE and mode MODE.
7673 CLASSES will be filled by the register class used to pass each word
7674 of the operand. The number of words is returned. In case the parameter
7675 should be passed in memory, 0 is returned. As a special case for zero
7676 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7678 BIT_OFFSET is used internally for handling records and specifies offset
7679 of the offset in bits modulo 512 to avoid overflow cases.
7681 See the x86-64 PS ABI for details.
7684 static int
7685 classify_argument (machine_mode mode, const_tree type,
7686 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
7688 HOST_WIDE_INT bytes =
7689 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7690 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
7692 /* Variable sized entities are always passed/returned in memory. */
7693 if (bytes < 0)
7694 return 0;
7696 if (mode != VOIDmode
7697 && targetm.calls.must_pass_in_stack (mode, type))
7698 return 0;
7700 if (type && AGGREGATE_TYPE_P (type))
7702 int i;
7703 tree field;
7704 enum x86_64_reg_class subclasses[MAX_CLASSES];
7706 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
7707 if (bytes > 64)
7708 return 0;
7710 for (i = 0; i < words; i++)
7711 classes[i] = X86_64_NO_CLASS;
7713 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
7714 signalize memory class, so handle it as special case. */
7715 if (!words)
7717 classes[0] = X86_64_NO_CLASS;
7718 return 1;
7721 /* Classify each field of record and merge classes. */
7722 switch (TREE_CODE (type))
7724 case RECORD_TYPE:
7725 /* And now merge the fields of structure. */
7726 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7728 if (TREE_CODE (field) == FIELD_DECL)
7730 int num;
7732 if (TREE_TYPE (field) == error_mark_node)
7733 continue;
7735 /* Bitfields are always classified as integer. Handle them
7736 early, since later code would consider them to be
7737 misaligned integers. */
7738 if (DECL_BIT_FIELD (field))
7740 for (i = (int_bit_position (field)
7741 + (bit_offset % 64)) / 8 / 8;
7742 i < ((int_bit_position (field) + (bit_offset % 64))
7743 + tree_to_shwi (DECL_SIZE (field))
7744 + 63) / 8 / 8; i++)
7745 classes[i] =
7746 merge_classes (X86_64_INTEGER_CLASS,
7747 classes[i]);
7749 else
7751 int pos;
7753 type = TREE_TYPE (field);
7755 /* Flexible array member is ignored. */
7756 if (TYPE_MODE (type) == BLKmode
7757 && TREE_CODE (type) == ARRAY_TYPE
7758 && TYPE_SIZE (type) == NULL_TREE
7759 && TYPE_DOMAIN (type) != NULL_TREE
7760 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
7761 == NULL_TREE))
7763 static bool warned;
7765 if (!warned && warn_psabi)
7767 warned = true;
7768 inform (input_location,
7769 "the ABI of passing struct with"
7770 " a flexible array member has"
7771 " changed in GCC 4.4");
7773 continue;
7775 num = classify_argument (TYPE_MODE (type), type,
7776 subclasses,
7777 (int_bit_position (field)
7778 + bit_offset) % 512);
7779 if (!num)
7780 return 0;
7781 pos = (int_bit_position (field)
7782 + (bit_offset % 64)) / 8 / 8;
7783 for (i = 0; i < num && (i + pos) < words; i++)
7784 classes[i + pos] =
7785 merge_classes (subclasses[i], classes[i + pos]);
7789 break;
7791 case ARRAY_TYPE:
7792 /* Arrays are handled as small records. */
7794 int num;
7795 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
7796 TREE_TYPE (type), subclasses, bit_offset);
7797 if (!num)
7798 return 0;
7800 /* The partial classes are now full classes. */
7801 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
7802 subclasses[0] = X86_64_SSE_CLASS;
7803 if (subclasses[0] == X86_64_INTEGERSI_CLASS
7804 && !((bit_offset % 64) == 0 && bytes == 4))
7805 subclasses[0] = X86_64_INTEGER_CLASS;
7807 for (i = 0; i < words; i++)
7808 classes[i] = subclasses[i % num];
7810 break;
7812 case UNION_TYPE:
7813 case QUAL_UNION_TYPE:
7814 /* Unions are similar to RECORD_TYPE but offset is always 0.
7816 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7818 if (TREE_CODE (field) == FIELD_DECL)
7820 int num;
7822 if (TREE_TYPE (field) == error_mark_node)
7823 continue;
7825 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
7826 TREE_TYPE (field), subclasses,
7827 bit_offset);
7828 if (!num)
7829 return 0;
7830 for (i = 0; i < num && i < words; i++)
7831 classes[i] = merge_classes (subclasses[i], classes[i]);
7834 break;
7836 default:
7837 gcc_unreachable ();
7840 if (words > 2)
7842 /* When size > 16 bytes, if the first one isn't
7843 X86_64_SSE_CLASS or any other ones aren't
7844 X86_64_SSEUP_CLASS, everything should be passed in
7845 memory. */
7846 if (classes[0] != X86_64_SSE_CLASS)
7847 return 0;
7849 for (i = 1; i < words; i++)
7850 if (classes[i] != X86_64_SSEUP_CLASS)
7851 return 0;
7854 /* Final merger cleanup. */
7855 for (i = 0; i < words; i++)
7857 /* If one class is MEMORY, everything should be passed in
7858 memory. */
7859 if (classes[i] == X86_64_MEMORY_CLASS)
7860 return 0;
7862 /* The X86_64_SSEUP_CLASS should be always preceded by
7863 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
7864 if (classes[i] == X86_64_SSEUP_CLASS
7865 && classes[i - 1] != X86_64_SSE_CLASS
7866 && classes[i - 1] != X86_64_SSEUP_CLASS)
7868 /* The first one should never be X86_64_SSEUP_CLASS. */
7869 gcc_assert (i != 0);
7870 classes[i] = X86_64_SSE_CLASS;
7873 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
7874 everything should be passed in memory. */
7875 if (classes[i] == X86_64_X87UP_CLASS
7876 && (classes[i - 1] != X86_64_X87_CLASS))
7878 static bool warned;
7880 /* The first one should never be X86_64_X87UP_CLASS. */
7881 gcc_assert (i != 0);
7882 if (!warned && warn_psabi)
7884 warned = true;
7885 inform (input_location,
7886 "the ABI of passing union with long double"
7887 " has changed in GCC 4.4");
7889 return 0;
7892 return words;
7895 /* Compute alignment needed. We align all types to natural boundaries with
7896 exception of XFmode that is aligned to 64bits. */
7897 if (mode != VOIDmode && mode != BLKmode)
7899 int mode_alignment = GET_MODE_BITSIZE (mode);
7901 if (mode == XFmode)
7902 mode_alignment = 128;
7903 else if (mode == XCmode)
7904 mode_alignment = 256;
7905 if (COMPLEX_MODE_P (mode))
7906 mode_alignment /= 2;
7907 /* Misaligned fields are always returned in memory. */
7908 if (bit_offset % mode_alignment)
7909 return 0;
7912 /* for V1xx modes, just use the base mode */
7913 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7914 && GET_MODE_UNIT_SIZE (mode) == bytes)
7915 mode = GET_MODE_INNER (mode);
7917 /* Classification of atomic types. */
7918 switch (mode)
7920 case E_SDmode:
7921 case E_DDmode:
7922 classes[0] = X86_64_SSE_CLASS;
7923 return 1;
7924 case E_TDmode:
7925 classes[0] = X86_64_SSE_CLASS;
7926 classes[1] = X86_64_SSEUP_CLASS;
7927 return 2;
7928 case E_DImode:
7929 case E_SImode:
7930 case E_HImode:
7931 case E_QImode:
7932 case E_CSImode:
7933 case E_CHImode:
7934 case E_CQImode:
7936 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7938 /* Analyze last 128 bits only. */
7939 size = (size - 1) & 0x7f;
7941 if (size < 32)
7943 classes[0] = X86_64_INTEGERSI_CLASS;
7944 return 1;
7946 else if (size < 64)
7948 classes[0] = X86_64_INTEGER_CLASS;
7949 return 1;
7951 else if (size < 64+32)
7953 classes[0] = X86_64_INTEGER_CLASS;
7954 classes[1] = X86_64_INTEGERSI_CLASS;
7955 return 2;
7957 else if (size < 64+64)
7959 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7960 return 2;
7962 else
7963 gcc_unreachable ();
7965 case E_CDImode:
7966 case E_TImode:
7967 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7968 return 2;
7969 case E_COImode:
7970 case E_OImode:
7971 /* OImode shouldn't be used directly. */
7972 gcc_unreachable ();
7973 case E_CTImode:
7974 return 0;
7975 case E_SFmode:
7976 if (!(bit_offset % 64))
7977 classes[0] = X86_64_SSESF_CLASS;
7978 else
7979 classes[0] = X86_64_SSE_CLASS;
7980 return 1;
7981 case E_DFmode:
7982 classes[0] = X86_64_SSEDF_CLASS;
7983 return 1;
7984 case E_XFmode:
7985 classes[0] = X86_64_X87_CLASS;
7986 classes[1] = X86_64_X87UP_CLASS;
7987 return 2;
7988 case E_TFmode:
7989 classes[0] = X86_64_SSE_CLASS;
7990 classes[1] = X86_64_SSEUP_CLASS;
7991 return 2;
7992 case E_SCmode:
7993 classes[0] = X86_64_SSE_CLASS;
7994 if (!(bit_offset % 64))
7995 return 1;
7996 else
7998 static bool warned;
8000 if (!warned && warn_psabi)
8002 warned = true;
8003 inform (input_location,
8004 "the ABI of passing structure with complex float"
8005 " member has changed in GCC 4.4");
8007 classes[1] = X86_64_SSESF_CLASS;
8008 return 2;
8010 case E_DCmode:
8011 classes[0] = X86_64_SSEDF_CLASS;
8012 classes[1] = X86_64_SSEDF_CLASS;
8013 return 2;
8014 case E_XCmode:
8015 classes[0] = X86_64_COMPLEX_X87_CLASS;
8016 return 1;
8017 case E_TCmode:
8018 /* This modes is larger than 16 bytes. */
8019 return 0;
8020 case E_V8SFmode:
8021 case E_V8SImode:
8022 case E_V32QImode:
8023 case E_V16HImode:
8024 case E_V4DFmode:
8025 case E_V4DImode:
8026 classes[0] = X86_64_SSE_CLASS;
8027 classes[1] = X86_64_SSEUP_CLASS;
8028 classes[2] = X86_64_SSEUP_CLASS;
8029 classes[3] = X86_64_SSEUP_CLASS;
8030 return 4;
8031 case E_V8DFmode:
8032 case E_V16SFmode:
8033 case E_V8DImode:
8034 case E_V16SImode:
8035 case E_V32HImode:
8036 case E_V64QImode:
8037 classes[0] = X86_64_SSE_CLASS;
8038 classes[1] = X86_64_SSEUP_CLASS;
8039 classes[2] = X86_64_SSEUP_CLASS;
8040 classes[3] = X86_64_SSEUP_CLASS;
8041 classes[4] = X86_64_SSEUP_CLASS;
8042 classes[5] = X86_64_SSEUP_CLASS;
8043 classes[6] = X86_64_SSEUP_CLASS;
8044 classes[7] = X86_64_SSEUP_CLASS;
8045 return 8;
8046 case E_V4SFmode:
8047 case E_V4SImode:
8048 case E_V16QImode:
8049 case E_V8HImode:
8050 case E_V2DFmode:
8051 case E_V2DImode:
8052 classes[0] = X86_64_SSE_CLASS;
8053 classes[1] = X86_64_SSEUP_CLASS;
8054 return 2;
8055 case E_V1TImode:
8056 case E_V1DImode:
8057 case E_V2SFmode:
8058 case E_V2SImode:
8059 case E_V4HImode:
8060 case E_V8QImode:
8061 classes[0] = X86_64_SSE_CLASS;
8062 return 1;
8063 case E_BLKmode:
8064 case E_VOIDmode:
8065 return 0;
8066 default:
8067 gcc_assert (VECTOR_MODE_P (mode));
8069 if (bytes > 16)
8070 return 0;
8072 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
8074 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
8075 classes[0] = X86_64_INTEGERSI_CLASS;
8076 else
8077 classes[0] = X86_64_INTEGER_CLASS;
8078 classes[1] = X86_64_INTEGER_CLASS;
8079 return 1 + (bytes > 8);
8083 /* Examine the argument and return set number of register required in each
8084 class. Return true iff parameter should be passed in memory. */
8086 static bool
8087 examine_argument (machine_mode mode, const_tree type, int in_return,
8088 int *int_nregs, int *sse_nregs)
8090 enum x86_64_reg_class regclass[MAX_CLASSES];
8091 int n = classify_argument (mode, type, regclass, 0);
8093 *int_nregs = 0;
8094 *sse_nregs = 0;
8096 if (!n)
8097 return true;
8098 for (n--; n >= 0; n--)
8099 switch (regclass[n])
8101 case X86_64_INTEGER_CLASS:
8102 case X86_64_INTEGERSI_CLASS:
8103 (*int_nregs)++;
8104 break;
8105 case X86_64_SSE_CLASS:
8106 case X86_64_SSESF_CLASS:
8107 case X86_64_SSEDF_CLASS:
8108 (*sse_nregs)++;
8109 break;
8110 case X86_64_NO_CLASS:
8111 case X86_64_SSEUP_CLASS:
8112 break;
8113 case X86_64_X87_CLASS:
8114 case X86_64_X87UP_CLASS:
8115 case X86_64_COMPLEX_X87_CLASS:
8116 if (!in_return)
8117 return true;
8118 break;
8119 case X86_64_MEMORY_CLASS:
8120 gcc_unreachable ();
8123 return false;
8126 /* Construct container for the argument used by GCC interface. See
8127 FUNCTION_ARG for the detailed description. */
8129 static rtx
8130 construct_container (machine_mode mode, machine_mode orig_mode,
8131 const_tree type, int in_return, int nintregs, int nsseregs,
8132 const int *intreg, int sse_regno)
8134 /* The following variables hold the static issued_error state. */
8135 static bool issued_sse_arg_error;
8136 static bool issued_sse_ret_error;
8137 static bool issued_x87_ret_error;
8139 machine_mode tmpmode;
8140 int bytes =
8141 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
8142 enum x86_64_reg_class regclass[MAX_CLASSES];
8143 int n;
8144 int i;
8145 int nexps = 0;
8146 int needed_sseregs, needed_intregs;
8147 rtx exp[MAX_CLASSES];
8148 rtx ret;
8150 n = classify_argument (mode, type, regclass, 0);
8151 if (!n)
8152 return NULL;
8153 if (examine_argument (mode, type, in_return, &needed_intregs,
8154 &needed_sseregs))
8155 return NULL;
8156 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
8157 return NULL;
8159 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
8160 some less clueful developer tries to use floating-point anyway. */
8161 if (needed_sseregs && !TARGET_SSE)
8163 if (in_return)
8165 if (!issued_sse_ret_error)
8167 error ("SSE register return with SSE disabled");
8168 issued_sse_ret_error = true;
8171 else if (!issued_sse_arg_error)
8173 error ("SSE register argument with SSE disabled");
8174 issued_sse_arg_error = true;
8176 return NULL;
8179 /* Likewise, error if the ABI requires us to return values in the
8180 x87 registers and the user specified -mno-80387. */
8181 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
8182 for (i = 0; i < n; i++)
8183 if (regclass[i] == X86_64_X87_CLASS
8184 || regclass[i] == X86_64_X87UP_CLASS
8185 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
8187 if (!issued_x87_ret_error)
8189 error ("x87 register return with x87 disabled");
8190 issued_x87_ret_error = true;
8192 return NULL;
8195 /* First construct simple cases. Avoid SCmode, since we want to use
8196 single register to pass this type. */
8197 if (n == 1 && mode != SCmode)
8198 switch (regclass[0])
8200 case X86_64_INTEGER_CLASS:
8201 case X86_64_INTEGERSI_CLASS:
8202 return gen_rtx_REG (mode, intreg[0]);
8203 case X86_64_SSE_CLASS:
8204 case X86_64_SSESF_CLASS:
8205 case X86_64_SSEDF_CLASS:
8206 if (mode != BLKmode)
8207 return gen_reg_or_parallel (mode, orig_mode,
8208 SSE_REGNO (sse_regno));
8209 break;
8210 case X86_64_X87_CLASS:
8211 case X86_64_COMPLEX_X87_CLASS:
8212 return gen_rtx_REG (mode, FIRST_STACK_REG);
8213 case X86_64_NO_CLASS:
8214 /* Zero sized array, struct or class. */
8215 return NULL;
8216 default:
8217 gcc_unreachable ();
8219 if (n == 2
8220 && regclass[0] == X86_64_SSE_CLASS
8221 && regclass[1] == X86_64_SSEUP_CLASS
8222 && mode != BLKmode)
8223 return gen_reg_or_parallel (mode, orig_mode,
8224 SSE_REGNO (sse_regno));
8225 if (n == 4
8226 && regclass[0] == X86_64_SSE_CLASS
8227 && regclass[1] == X86_64_SSEUP_CLASS
8228 && regclass[2] == X86_64_SSEUP_CLASS
8229 && regclass[3] == X86_64_SSEUP_CLASS
8230 && mode != BLKmode)
8231 return gen_reg_or_parallel (mode, orig_mode,
8232 SSE_REGNO (sse_regno));
8233 if (n == 8
8234 && regclass[0] == X86_64_SSE_CLASS
8235 && regclass[1] == X86_64_SSEUP_CLASS
8236 && regclass[2] == X86_64_SSEUP_CLASS
8237 && regclass[3] == X86_64_SSEUP_CLASS
8238 && regclass[4] == X86_64_SSEUP_CLASS
8239 && regclass[5] == X86_64_SSEUP_CLASS
8240 && regclass[6] == X86_64_SSEUP_CLASS
8241 && regclass[7] == X86_64_SSEUP_CLASS
8242 && mode != BLKmode)
8243 return gen_reg_or_parallel (mode, orig_mode,
8244 SSE_REGNO (sse_regno));
8245 if (n == 2
8246 && regclass[0] == X86_64_X87_CLASS
8247 && regclass[1] == X86_64_X87UP_CLASS)
8248 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
8250 if (n == 2
8251 && regclass[0] == X86_64_INTEGER_CLASS
8252 && regclass[1] == X86_64_INTEGER_CLASS
8253 && (mode == CDImode || mode == TImode)
8254 && intreg[0] + 1 == intreg[1])
8255 return gen_rtx_REG (mode, intreg[0]);
8257 /* Otherwise figure out the entries of the PARALLEL. */
8258 for (i = 0; i < n; i++)
8260 int pos;
8262 switch (regclass[i])
8264 case X86_64_NO_CLASS:
8265 break;
8266 case X86_64_INTEGER_CLASS:
8267 case X86_64_INTEGERSI_CLASS:
8268 /* Merge TImodes on aligned occasions here too. */
8269 if (i * 8 + 8 > bytes)
8271 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
8272 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
8273 /* We've requested 24 bytes we
8274 don't have mode for. Use DImode. */
8275 tmpmode = DImode;
8277 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
8278 tmpmode = SImode;
8279 else
8280 tmpmode = DImode;
8281 exp [nexps++]
8282 = gen_rtx_EXPR_LIST (VOIDmode,
8283 gen_rtx_REG (tmpmode, *intreg),
8284 GEN_INT (i*8));
8285 intreg++;
8286 break;
8287 case X86_64_SSESF_CLASS:
8288 exp [nexps++]
8289 = gen_rtx_EXPR_LIST (VOIDmode,
8290 gen_rtx_REG (SFmode,
8291 SSE_REGNO (sse_regno)),
8292 GEN_INT (i*8));
8293 sse_regno++;
8294 break;
8295 case X86_64_SSEDF_CLASS:
8296 exp [nexps++]
8297 = gen_rtx_EXPR_LIST (VOIDmode,
8298 gen_rtx_REG (DFmode,
8299 SSE_REGNO (sse_regno)),
8300 GEN_INT (i*8));
8301 sse_regno++;
8302 break;
8303 case X86_64_SSE_CLASS:
8304 pos = i;
8305 switch (n)
8307 case 1:
8308 tmpmode = DImode;
8309 break;
8310 case 2:
8311 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
8313 tmpmode = TImode;
8314 i++;
8316 else
8317 tmpmode = DImode;
8318 break;
8319 case 4:
8320 gcc_assert (i == 0
8321 && regclass[1] == X86_64_SSEUP_CLASS
8322 && regclass[2] == X86_64_SSEUP_CLASS
8323 && regclass[3] == X86_64_SSEUP_CLASS);
8324 tmpmode = OImode;
8325 i += 3;
8326 break;
8327 case 8:
8328 gcc_assert (i == 0
8329 && regclass[1] == X86_64_SSEUP_CLASS
8330 && regclass[2] == X86_64_SSEUP_CLASS
8331 && regclass[3] == X86_64_SSEUP_CLASS
8332 && regclass[4] == X86_64_SSEUP_CLASS
8333 && regclass[5] == X86_64_SSEUP_CLASS
8334 && regclass[6] == X86_64_SSEUP_CLASS
8335 && regclass[7] == X86_64_SSEUP_CLASS);
8336 tmpmode = XImode;
8337 i += 7;
8338 break;
8339 default:
8340 gcc_unreachable ();
8342 exp [nexps++]
8343 = gen_rtx_EXPR_LIST (VOIDmode,
8344 gen_rtx_REG (tmpmode,
8345 SSE_REGNO (sse_regno)),
8346 GEN_INT (pos*8));
8347 sse_regno++;
8348 break;
8349 default:
8350 gcc_unreachable ();
8354 /* Empty aligned struct, union or class. */
8355 if (nexps == 0)
8356 return NULL;
8358 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
8359 for (i = 0; i < nexps; i++)
8360 XVECEXP (ret, 0, i) = exp [i];
8361 return ret;
8364 /* Update the data in CUM to advance over an argument of mode MODE
8365 and data type TYPE. (TYPE is null for libcalls where that information
8366 may not be available.)
8368 Return a number of integer regsiters advanced over. */
8370 static int
8371 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8372 const_tree type, HOST_WIDE_INT bytes,
8373 HOST_WIDE_INT words)
8375 int res = 0;
8376 bool error_p = false;
8378 if (TARGET_IAMCU)
8380 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8381 bytes in registers. */
8382 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8383 goto pass_in_reg;
8384 return res;
8387 switch (mode)
8389 default:
8390 break;
8392 case E_BLKmode:
8393 if (bytes < 0)
8394 break;
8395 /* FALLTHRU */
8397 case E_DImode:
8398 case E_SImode:
8399 case E_HImode:
8400 case E_QImode:
8401 pass_in_reg:
8402 cum->words += words;
8403 cum->nregs -= words;
8404 cum->regno += words;
8405 if (cum->nregs >= 0)
8406 res = words;
8407 if (cum->nregs <= 0)
8409 cum->nregs = 0;
8410 cfun->machine->arg_reg_available = false;
8411 cum->regno = 0;
8413 break;
8415 case E_OImode:
8416 /* OImode shouldn't be used directly. */
8417 gcc_unreachable ();
8419 case E_DFmode:
8420 if (cum->float_in_sse == -1)
8421 error_p = true;
8422 if (cum->float_in_sse < 2)
8423 break;
8424 /* FALLTHRU */
8425 case E_SFmode:
8426 if (cum->float_in_sse == -1)
8427 error_p = true;
8428 if (cum->float_in_sse < 1)
8429 break;
8430 /* FALLTHRU */
8432 case E_V8SFmode:
8433 case E_V8SImode:
8434 case E_V64QImode:
8435 case E_V32HImode:
8436 case E_V16SImode:
8437 case E_V8DImode:
8438 case E_V16SFmode:
8439 case E_V8DFmode:
8440 case E_V32QImode:
8441 case E_V16HImode:
8442 case E_V4DFmode:
8443 case E_V4DImode:
8444 case E_TImode:
8445 case E_V16QImode:
8446 case E_V8HImode:
8447 case E_V4SImode:
8448 case E_V2DImode:
8449 case E_V4SFmode:
8450 case E_V2DFmode:
8451 if (!type || !AGGREGATE_TYPE_P (type))
8453 cum->sse_words += words;
8454 cum->sse_nregs -= 1;
8455 cum->sse_regno += 1;
8456 if (cum->sse_nregs <= 0)
8458 cum->sse_nregs = 0;
8459 cum->sse_regno = 0;
8462 break;
8464 case E_V8QImode:
8465 case E_V4HImode:
8466 case E_V2SImode:
8467 case E_V2SFmode:
8468 case E_V1TImode:
8469 case E_V1DImode:
8470 if (!type || !AGGREGATE_TYPE_P (type))
8472 cum->mmx_words += words;
8473 cum->mmx_nregs -= 1;
8474 cum->mmx_regno += 1;
8475 if (cum->mmx_nregs <= 0)
8477 cum->mmx_nregs = 0;
8478 cum->mmx_regno = 0;
8481 break;
8483 if (error_p)
8485 cum->float_in_sse = 0;
8486 error ("calling %qD with SSE calling convention without "
8487 "SSE/SSE2 enabled", cum->decl);
8488 sorry ("this is a GCC bug that can be worked around by adding "
8489 "attribute used to function called");
8492 return res;
8495 static int
8496 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8497 const_tree type, HOST_WIDE_INT words, bool named)
8499 int int_nregs, sse_nregs;
8501 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8502 if (!named && (VALID_AVX512F_REG_MODE (mode)
8503 || VALID_AVX256_REG_MODE (mode)))
8504 return 0;
8506 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8507 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8509 cum->nregs -= int_nregs;
8510 cum->sse_nregs -= sse_nregs;
8511 cum->regno += int_nregs;
8512 cum->sse_regno += sse_nregs;
8513 return int_nregs;
8515 else
8517 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8518 cum->words = ROUND_UP (cum->words, align);
8519 cum->words += words;
8520 return 0;
8524 static int
8525 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8526 HOST_WIDE_INT words)
8528 /* Otherwise, this should be passed indirect. */
8529 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8531 cum->words += words;
8532 if (cum->nregs > 0)
8534 cum->nregs -= 1;
8535 cum->regno += 1;
8536 return 1;
8538 return 0;
8541 /* Update the data in CUM to advance over an argument of mode MODE and
8542 data type TYPE. (TYPE is null for libcalls where that information
8543 may not be available.) */
8545 static void
8546 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8547 const_tree type, bool named)
8549 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8550 HOST_WIDE_INT bytes, words;
8551 int nregs;
8553 /* The argument of interrupt handler is a special case and is
8554 handled in ix86_function_arg. */
8555 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8556 return;
8558 if (mode == BLKmode)
8559 bytes = int_size_in_bytes (type);
8560 else
8561 bytes = GET_MODE_SIZE (mode);
8562 words = CEIL (bytes, UNITS_PER_WORD);
8564 if (type)
8565 mode = type_natural_mode (type, NULL, false);
8567 if ((type && POINTER_BOUNDS_TYPE_P (type))
8568 || POINTER_BOUNDS_MODE_P (mode))
8570 /* If we pass bounds in BT then just update remained bounds count. */
8571 if (cum->bnds_in_bt)
8573 cum->bnds_in_bt--;
8574 return;
8577 /* Update remained number of bounds to force. */
8578 if (cum->force_bnd_pass)
8579 cum->force_bnd_pass--;
8581 cum->bnd_regno++;
8583 return;
8586 /* The first arg not going to Bounds Tables resets this counter. */
8587 cum->bnds_in_bt = 0;
8588 /* For unnamed args we always pass bounds to avoid bounds mess when
8589 passed and received types do not match. If bounds do not follow
8590 unnamed arg, still pretend required number of bounds were passed. */
8591 if (cum->force_bnd_pass)
8593 cum->bnd_regno += cum->force_bnd_pass;
8594 cum->force_bnd_pass = 0;
8597 if (TARGET_64BIT)
8599 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8601 if (call_abi == MS_ABI)
8602 nregs = function_arg_advance_ms_64 (cum, bytes, words);
8603 else
8604 nregs = function_arg_advance_64 (cum, mode, type, words, named);
8606 else
8607 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8609 /* For stdarg we expect bounds to be passed for each value passed
8610 in register. */
8611 if (cum->stdarg)
8612 cum->force_bnd_pass = nregs;
8613 /* For pointers passed in memory we expect bounds passed in Bounds
8614 Table. */
8615 if (!nregs)
8617 /* Track if there are outgoing arguments on stack. */
8618 if (cum->caller)
8619 cfun->machine->outgoing_args_on_stack = true;
8621 cum->bnds_in_bt = chkp_type_bounds_count (type);
8625 /* Define where to put the arguments to a function.
8626 Value is zero to push the argument on the stack,
8627 or a hard register in which to store the argument.
8629 MODE is the argument's machine mode.
8630 TYPE is the data type of the argument (as a tree).
8631 This is null for libcalls where that information may
8632 not be available.
8633 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8634 the preceding args and about the function being called.
8635 NAMED is nonzero if this argument is a named parameter
8636 (otherwise it is an extra parameter matching an ellipsis). */
8638 static rtx
8639 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8640 machine_mode orig_mode, const_tree type,
8641 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8643 bool error_p = false;
8645 /* Avoid the AL settings for the Unix64 ABI. */
8646 if (mode == VOIDmode)
8647 return constm1_rtx;
8649 if (TARGET_IAMCU)
8651 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8652 bytes in registers. */
8653 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8654 goto pass_in_reg;
8655 return NULL_RTX;
8658 switch (mode)
8660 default:
8661 break;
8663 case E_BLKmode:
8664 if (bytes < 0)
8665 break;
8666 /* FALLTHRU */
8667 case E_DImode:
8668 case E_SImode:
8669 case E_HImode:
8670 case E_QImode:
8671 pass_in_reg:
8672 if (words <= cum->nregs)
8674 int regno = cum->regno;
8676 /* Fastcall allocates the first two DWORD (SImode) or
8677 smaller arguments to ECX and EDX if it isn't an
8678 aggregate type . */
8679 if (cum->fastcall)
8681 if (mode == BLKmode
8682 || mode == DImode
8683 || (type && AGGREGATE_TYPE_P (type)))
8684 break;
8686 /* ECX not EAX is the first allocated register. */
8687 if (regno == AX_REG)
8688 regno = CX_REG;
8690 return gen_rtx_REG (mode, regno);
8692 break;
8694 case E_DFmode:
8695 if (cum->float_in_sse == -1)
8696 error_p = true;
8697 if (cum->float_in_sse < 2)
8698 break;
8699 /* FALLTHRU */
8700 case E_SFmode:
8701 if (cum->float_in_sse == -1)
8702 error_p = true;
8703 if (cum->float_in_sse < 1)
8704 break;
8705 /* FALLTHRU */
8706 case E_TImode:
8707 /* In 32bit, we pass TImode in xmm registers. */
8708 case E_V16QImode:
8709 case E_V8HImode:
8710 case E_V4SImode:
8711 case E_V2DImode:
8712 case E_V4SFmode:
8713 case E_V2DFmode:
8714 if (!type || !AGGREGATE_TYPE_P (type))
8716 if (cum->sse_nregs)
8717 return gen_reg_or_parallel (mode, orig_mode,
8718 cum->sse_regno + FIRST_SSE_REG);
8720 break;
8722 case E_OImode:
8723 case E_XImode:
8724 /* OImode and XImode shouldn't be used directly. */
8725 gcc_unreachable ();
8727 case E_V64QImode:
8728 case E_V32HImode:
8729 case E_V16SImode:
8730 case E_V8DImode:
8731 case E_V16SFmode:
8732 case E_V8DFmode:
8733 case E_V8SFmode:
8734 case E_V8SImode:
8735 case E_V32QImode:
8736 case E_V16HImode:
8737 case E_V4DFmode:
8738 case E_V4DImode:
8739 if (!type || !AGGREGATE_TYPE_P (type))
8741 if (cum->sse_nregs)
8742 return gen_reg_or_parallel (mode, orig_mode,
8743 cum->sse_regno + FIRST_SSE_REG);
8745 break;
8747 case E_V8QImode:
8748 case E_V4HImode:
8749 case E_V2SImode:
8750 case E_V2SFmode:
8751 case E_V1TImode:
8752 case E_V1DImode:
8753 if (!type || !AGGREGATE_TYPE_P (type))
8755 if (cum->mmx_nregs)
8756 return gen_reg_or_parallel (mode, orig_mode,
8757 cum->mmx_regno + FIRST_MMX_REG);
8759 break;
8761 if (error_p)
8763 cum->float_in_sse = 0;
8764 error ("calling %qD with SSE calling convention without "
8765 "SSE/SSE2 enabled", cum->decl);
8766 sorry ("this is a GCC bug that can be worked around by adding "
8767 "attribute used to function called");
8770 return NULL_RTX;
8773 static rtx
8774 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8775 machine_mode orig_mode, const_tree type, bool named)
8777 /* Handle a hidden AL argument containing number of registers
8778 for varargs x86-64 functions. */
8779 if (mode == VOIDmode)
8780 return GEN_INT (cum->maybe_vaarg
8781 ? (cum->sse_nregs < 0
8782 ? X86_64_SSE_REGPARM_MAX
8783 : cum->sse_regno)
8784 : -1);
8786 switch (mode)
8788 default:
8789 break;
8791 case E_V8SFmode:
8792 case E_V8SImode:
8793 case E_V32QImode:
8794 case E_V16HImode:
8795 case E_V4DFmode:
8796 case E_V4DImode:
8797 case E_V16SFmode:
8798 case E_V16SImode:
8799 case E_V64QImode:
8800 case E_V32HImode:
8801 case E_V8DFmode:
8802 case E_V8DImode:
8803 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8804 if (!named)
8805 return NULL;
8806 break;
8809 return construct_container (mode, orig_mode, type, 0, cum->nregs,
8810 cum->sse_nregs,
8811 &x86_64_int_parameter_registers [cum->regno],
8812 cum->sse_regno);
8815 static rtx
8816 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8817 machine_mode orig_mode, bool named,
8818 HOST_WIDE_INT bytes)
8820 unsigned int regno;
8822 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
8823 We use value of -2 to specify that current function call is MSABI. */
8824 if (mode == VOIDmode)
8825 return GEN_INT (-2);
8827 /* If we've run out of registers, it goes on the stack. */
8828 if (cum->nregs == 0)
8829 return NULL_RTX;
8831 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
8833 /* Only floating point modes are passed in anything but integer regs. */
8834 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
8836 if (named)
8837 regno = cum->regno + FIRST_SSE_REG;
8838 else
8840 rtx t1, t2;
8842 /* Unnamed floating parameters are passed in both the
8843 SSE and integer registers. */
8844 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
8845 t2 = gen_rtx_REG (mode, regno);
8846 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
8847 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
8848 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
8851 /* Handle aggregated types passed in register. */
8852 if (orig_mode == BLKmode)
8854 if (bytes > 0 && bytes <= 8)
8855 mode = (bytes > 4 ? DImode : SImode);
8856 if (mode == BLKmode)
8857 mode = DImode;
8860 return gen_reg_or_parallel (mode, orig_mode, regno);
8863 /* Return where to put the arguments to a function.
8864 Return zero to push the argument on the stack, or a hard register in which to store the argument.
8866 MODE is the argument's machine mode. TYPE is the data type of the
8867 argument. It is null for libcalls where that information may not be
8868 available. CUM gives information about the preceding args and about
8869 the function being called. NAMED is nonzero if this argument is a
8870 named parameter (otherwise it is an extra parameter matching an
8871 ellipsis). */
8873 static rtx
8874 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
8875 const_tree type, bool named)
8877 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8878 machine_mode mode = omode;
8879 HOST_WIDE_INT bytes, words;
8880 rtx arg;
8882 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8884 gcc_assert (type != NULL_TREE);
8885 if (POINTER_TYPE_P (type))
8887 /* This is the pointer argument. */
8888 gcc_assert (TYPE_MODE (type) == Pmode);
8889 /* It is at -WORD(AP) in the current frame in interrupt and
8890 exception handlers. */
8891 arg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
8893 else
8895 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
8896 && TREE_CODE (type) == INTEGER_TYPE
8897 && TYPE_MODE (type) == word_mode);
8898 /* The error code is the word-mode integer argument at
8899 -2 * WORD(AP) in the current frame of the exception
8900 handler. */
8901 arg = gen_rtx_MEM (word_mode,
8902 plus_constant (Pmode,
8903 arg_pointer_rtx,
8904 -2 * UNITS_PER_WORD));
8906 return arg;
8909 /* All pointer bounds arguments are handled separately here. */
8910 if ((type && POINTER_BOUNDS_TYPE_P (type))
8911 || POINTER_BOUNDS_MODE_P (mode))
8913 /* Return NULL if bounds are forced to go in Bounds Table. */
8914 if (cum->bnds_in_bt)
8915 arg = NULL;
8916 /* Return the next available bound reg if any. */
8917 else if (cum->bnd_regno <= LAST_BND_REG)
8918 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
8919 /* Return the next special slot number otherwise. */
8920 else
8921 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
8923 return arg;
8926 if (mode == BLKmode)
8927 bytes = int_size_in_bytes (type);
8928 else
8929 bytes = GET_MODE_SIZE (mode);
8930 words = CEIL (bytes, UNITS_PER_WORD);
8932 /* To simplify the code below, represent vector types with a vector mode
8933 even if MMX/SSE are not active. */
8934 if (type && TREE_CODE (type) == VECTOR_TYPE)
8935 mode = type_natural_mode (type, cum, false);
8937 if (TARGET_64BIT)
8939 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8941 if (call_abi == MS_ABI)
8942 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
8943 else
8944 arg = function_arg_64 (cum, mode, omode, type, named);
8946 else
8947 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
8949 /* Track if there are outgoing arguments on stack. */
8950 if (arg == NULL_RTX && cum->caller)
8951 cfun->machine->outgoing_args_on_stack = true;
8953 return arg;
8956 /* A C expression that indicates when an argument must be passed by
8957 reference. If nonzero for an argument, a copy of that argument is
8958 made in memory and a pointer to the argument is passed instead of
8959 the argument itself. The pointer is passed in whatever way is
8960 appropriate for passing a pointer to that type. */
8962 static bool
8963 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8964 const_tree type, bool)
8966 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8968 /* Bounds are never passed by reference. */
8969 if ((type && POINTER_BOUNDS_TYPE_P (type))
8970 || POINTER_BOUNDS_MODE_P (mode))
8971 return false;
8973 if (TARGET_64BIT)
8975 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8977 /* See Windows x64 Software Convention. */
8978 if (call_abi == MS_ABI)
8980 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
8982 if (type)
8984 /* Arrays are passed by reference. */
8985 if (TREE_CODE (type) == ARRAY_TYPE)
8986 return true;
8988 if (RECORD_OR_UNION_TYPE_P (type))
8990 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8991 are passed by reference. */
8992 msize = int_size_in_bytes (type);
8996 /* __m128 is passed by reference. */
8997 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8999 else if (type && int_size_in_bytes (type) == -1)
9000 return true;
9003 return false;
9006 /* Return true when TYPE should be 128bit aligned for 32bit argument
9007 passing ABI. XXX: This function is obsolete and is only used for
9008 checking psABI compatibility with previous versions of GCC. */
9010 static bool
9011 ix86_compat_aligned_value_p (const_tree type)
9013 machine_mode mode = TYPE_MODE (type);
9014 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
9015 || mode == TDmode
9016 || mode == TFmode
9017 || mode == TCmode)
9018 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
9019 return true;
9020 if (TYPE_ALIGN (type) < 128)
9021 return false;
9023 if (AGGREGATE_TYPE_P (type))
9025 /* Walk the aggregates recursively. */
9026 switch (TREE_CODE (type))
9028 case RECORD_TYPE:
9029 case UNION_TYPE:
9030 case QUAL_UNION_TYPE:
9032 tree field;
9034 /* Walk all the structure fields. */
9035 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9037 if (TREE_CODE (field) == FIELD_DECL
9038 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
9039 return true;
9041 break;
9044 case ARRAY_TYPE:
9045 /* Just for use if some languages passes arrays by value. */
9046 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
9047 return true;
9048 break;
9050 default:
9051 gcc_unreachable ();
9054 return false;
9057 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
9058 XXX: This function is obsolete and is only used for checking psABI
9059 compatibility with previous versions of GCC. */
9061 static unsigned int
9062 ix86_compat_function_arg_boundary (machine_mode mode,
9063 const_tree type, unsigned int align)
9065 /* In 32bit, only _Decimal128 and __float128 are aligned to their
9066 natural boundaries. */
9067 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
9069 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
9070 make an exception for SSE modes since these require 128bit
9071 alignment.
9073 The handling here differs from field_alignment. ICC aligns MMX
9074 arguments to 4 byte boundaries, while structure fields are aligned
9075 to 8 byte boundaries. */
9076 if (!type)
9078 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
9079 align = PARM_BOUNDARY;
9081 else
9083 if (!ix86_compat_aligned_value_p (type))
9084 align = PARM_BOUNDARY;
9087 if (align > BIGGEST_ALIGNMENT)
9088 align = BIGGEST_ALIGNMENT;
9089 return align;
9092 /* Return true when TYPE should be 128bit aligned for 32bit argument
9093 passing ABI. */
9095 static bool
9096 ix86_contains_aligned_value_p (const_tree type)
9098 machine_mode mode = TYPE_MODE (type);
9100 if (mode == XFmode || mode == XCmode)
9101 return false;
9103 if (TYPE_ALIGN (type) < 128)
9104 return false;
9106 if (AGGREGATE_TYPE_P (type))
9108 /* Walk the aggregates recursively. */
9109 switch (TREE_CODE (type))
9111 case RECORD_TYPE:
9112 case UNION_TYPE:
9113 case QUAL_UNION_TYPE:
9115 tree field;
9117 /* Walk all the structure fields. */
9118 for (field = TYPE_FIELDS (type);
9119 field;
9120 field = DECL_CHAIN (field))
9122 if (TREE_CODE (field) == FIELD_DECL
9123 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
9124 return true;
9126 break;
9129 case ARRAY_TYPE:
9130 /* Just for use if some languages passes arrays by value. */
9131 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
9132 return true;
9133 break;
9135 default:
9136 gcc_unreachable ();
9139 else
9140 return TYPE_ALIGN (type) >= 128;
9142 return false;
9145 /* Gives the alignment boundary, in bits, of an argument with the
9146 specified mode and type. */
9148 static unsigned int
9149 ix86_function_arg_boundary (machine_mode mode, const_tree type)
9151 unsigned int align;
9152 if (type)
9154 /* Since the main variant type is used for call, we convert it to
9155 the main variant type. */
9156 type = TYPE_MAIN_VARIANT (type);
9157 align = TYPE_ALIGN (type);
9158 if (TYPE_EMPTY_P (type))
9159 return PARM_BOUNDARY;
9161 else
9162 align = GET_MODE_ALIGNMENT (mode);
9163 if (align < PARM_BOUNDARY)
9164 align = PARM_BOUNDARY;
9165 else
9167 static bool warned;
9168 unsigned int saved_align = align;
9170 if (!TARGET_64BIT)
9172 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
9173 if (!type)
9175 if (mode == XFmode || mode == XCmode)
9176 align = PARM_BOUNDARY;
9178 else if (!ix86_contains_aligned_value_p (type))
9179 align = PARM_BOUNDARY;
9181 if (align < 128)
9182 align = PARM_BOUNDARY;
9185 if (warn_psabi
9186 && !warned
9187 && align != ix86_compat_function_arg_boundary (mode, type,
9188 saved_align))
9190 warned = true;
9191 inform (input_location,
9192 "The ABI for passing parameters with %d-byte"
9193 " alignment has changed in GCC 4.6",
9194 align / BITS_PER_UNIT);
9198 return align;
9201 /* Return true if N is a possible register number of function value. */
9203 static bool
9204 ix86_function_value_regno_p (const unsigned int regno)
9206 switch (regno)
9208 case AX_REG:
9209 return true;
9210 case DX_REG:
9211 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
9212 case DI_REG:
9213 case SI_REG:
9214 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
9216 case BND0_REG:
9217 case BND1_REG:
9218 return chkp_function_instrumented_p (current_function_decl);
9220 /* Complex values are returned in %st(0)/%st(1) pair. */
9221 case ST0_REG:
9222 case ST1_REG:
9223 /* TODO: The function should depend on current function ABI but
9224 builtins.c would need updating then. Therefore we use the
9225 default ABI. */
9226 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
9227 return false;
9228 return TARGET_FLOAT_RETURNS_IN_80387;
9230 /* Complex values are returned in %xmm0/%xmm1 pair. */
9231 case XMM0_REG:
9232 case XMM1_REG:
9233 return TARGET_SSE;
9235 case MM0_REG:
9236 if (TARGET_MACHO || TARGET_64BIT)
9237 return false;
9238 return TARGET_MMX;
9241 return false;
9244 /* Define how to find the value returned by a function.
9245 VALTYPE is the data type of the value (as a tree).
9246 If the precise function being called is known, FUNC is its FUNCTION_DECL;
9247 otherwise, FUNC is 0. */
9249 static rtx
9250 function_value_32 (machine_mode orig_mode, machine_mode mode,
9251 const_tree fntype, const_tree fn)
9253 unsigned int regno;
9255 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
9256 we normally prevent this case when mmx is not available. However
9257 some ABIs may require the result to be returned like DImode. */
9258 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
9259 regno = FIRST_MMX_REG;
9261 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
9262 we prevent this case when sse is not available. However some ABIs
9263 may require the result to be returned like integer TImode. */
9264 else if (mode == TImode
9265 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
9266 regno = FIRST_SSE_REG;
9268 /* 32-byte vector modes in %ymm0. */
9269 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
9270 regno = FIRST_SSE_REG;
9272 /* 64-byte vector modes in %zmm0. */
9273 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
9274 regno = FIRST_SSE_REG;
9276 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
9277 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
9278 regno = FIRST_FLOAT_REG;
9279 else
9280 /* Most things go in %eax. */
9281 regno = AX_REG;
9283 /* Override FP return register with %xmm0 for local functions when
9284 SSE math is enabled or for functions with sseregparm attribute. */
9285 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
9287 int sse_level = ix86_function_sseregparm (fntype, fn, false);
9288 if (sse_level == -1)
9290 error ("calling %qD with SSE calling convention without "
9291 "SSE/SSE2 enabled", fn);
9292 sorry ("this is a GCC bug that can be worked around by adding "
9293 "attribute used to function called");
9295 else if ((sse_level >= 1 && mode == SFmode)
9296 || (sse_level == 2 && mode == DFmode))
9297 regno = FIRST_SSE_REG;
9300 /* OImode shouldn't be used directly. */
9301 gcc_assert (mode != OImode);
9303 return gen_rtx_REG (orig_mode, regno);
9306 static rtx
9307 function_value_64 (machine_mode orig_mode, machine_mode mode,
9308 const_tree valtype)
9310 rtx ret;
9312 /* Handle libcalls, which don't provide a type node. */
9313 if (valtype == NULL)
9315 unsigned int regno;
9317 switch (mode)
9319 case E_SFmode:
9320 case E_SCmode:
9321 case E_DFmode:
9322 case E_DCmode:
9323 case E_TFmode:
9324 case E_SDmode:
9325 case E_DDmode:
9326 case E_TDmode:
9327 regno = FIRST_SSE_REG;
9328 break;
9329 case E_XFmode:
9330 case E_XCmode:
9331 regno = FIRST_FLOAT_REG;
9332 break;
9333 case E_TCmode:
9334 return NULL;
9335 default:
9336 regno = AX_REG;
9339 return gen_rtx_REG (mode, regno);
9341 else if (POINTER_TYPE_P (valtype))
9343 /* Pointers are always returned in word_mode. */
9344 mode = word_mode;
9347 ret = construct_container (mode, orig_mode, valtype, 1,
9348 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
9349 x86_64_int_return_registers, 0);
9351 /* For zero sized structures, construct_container returns NULL, but we
9352 need to keep rest of compiler happy by returning meaningful value. */
9353 if (!ret)
9354 ret = gen_rtx_REG (orig_mode, AX_REG);
9356 return ret;
9359 static rtx
9360 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
9361 const_tree valtype)
9363 unsigned int regno = AX_REG;
9365 if (TARGET_SSE)
9367 switch (GET_MODE_SIZE (mode))
9369 case 16:
9370 if (valtype != NULL_TREE
9371 && !VECTOR_INTEGER_TYPE_P (valtype)
9372 && !VECTOR_INTEGER_TYPE_P (valtype)
9373 && !INTEGRAL_TYPE_P (valtype)
9374 && !VECTOR_FLOAT_TYPE_P (valtype))
9375 break;
9376 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9377 && !COMPLEX_MODE_P (mode))
9378 regno = FIRST_SSE_REG;
9379 break;
9380 case 8:
9381 case 4:
9382 if (mode == SFmode || mode == DFmode)
9383 regno = FIRST_SSE_REG;
9384 break;
9385 default:
9386 break;
9389 return gen_rtx_REG (orig_mode, regno);
9392 static rtx
9393 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
9394 machine_mode orig_mode, machine_mode mode)
9396 const_tree fn, fntype;
9398 fn = NULL_TREE;
9399 if (fntype_or_decl && DECL_P (fntype_or_decl))
9400 fn = fntype_or_decl;
9401 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
9403 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
9404 || POINTER_BOUNDS_MODE_P (mode))
9405 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
9406 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
9407 return function_value_ms_64 (orig_mode, mode, valtype);
9408 else if (TARGET_64BIT)
9409 return function_value_64 (orig_mode, mode, valtype);
9410 else
9411 return function_value_32 (orig_mode, mode, fntype, fn);
9414 static rtx
9415 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9417 machine_mode mode, orig_mode;
9419 orig_mode = TYPE_MODE (valtype);
9420 mode = type_natural_mode (valtype, NULL, true);
9421 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9424 /* Return an RTX representing a place where a function returns
9425 or recieves pointer bounds or NULL if no bounds are returned.
9427 VALTYPE is a data type of a value returned by the function.
9429 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9430 or FUNCTION_TYPE of the function.
9432 If OUTGOING is false, return a place in which the caller will
9433 see the return value. Otherwise, return a place where a
9434 function returns a value. */
9436 static rtx
9437 ix86_function_value_bounds (const_tree valtype,
9438 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
9439 bool outgoing ATTRIBUTE_UNUSED)
9441 rtx res = NULL_RTX;
9443 if (BOUNDED_TYPE_P (valtype))
9444 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
9445 else if (chkp_type_has_pointer (valtype))
9447 bitmap slots;
9448 rtx bounds[2];
9449 bitmap_iterator bi;
9450 unsigned i, bnd_no = 0;
9452 bitmap_obstack_initialize (NULL);
9453 slots = BITMAP_ALLOC (NULL);
9454 chkp_find_bound_slots (valtype, slots);
9456 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
9458 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
9459 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
9460 gcc_assert (bnd_no < 2);
9461 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
9464 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
9466 BITMAP_FREE (slots);
9467 bitmap_obstack_release (NULL);
9469 else
9470 res = NULL_RTX;
9472 return res;
9475 /* Pointer function arguments and return values are promoted to
9476 word_mode for normal functions. */
9478 static machine_mode
9479 ix86_promote_function_mode (const_tree type, machine_mode mode,
9480 int *punsignedp, const_tree fntype,
9481 int for_return)
9483 if (cfun->machine->func_type == TYPE_NORMAL
9484 && type != NULL_TREE
9485 && POINTER_TYPE_P (type))
9487 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9488 return word_mode;
9490 return default_promote_function_mode (type, mode, punsignedp, fntype,
9491 for_return);
9494 /* Return true if a structure, union or array with MODE containing FIELD
9495 should be accessed using BLKmode. */
9497 static bool
9498 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9500 /* Union with XFmode must be in BLKmode. */
9501 return (mode == XFmode
9502 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9503 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9507 ix86_libcall_value (machine_mode mode)
9509 return ix86_function_value_1 (NULL, NULL, mode, mode);
9512 /* Return true iff type is returned in memory. */
9514 static bool
9515 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9517 #ifdef SUBTARGET_RETURN_IN_MEMORY
9518 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9519 #else
9520 const machine_mode mode = type_natural_mode (type, NULL, true);
9521 HOST_WIDE_INT size;
9523 if (POINTER_BOUNDS_TYPE_P (type))
9524 return false;
9526 if (TARGET_64BIT)
9528 if (ix86_function_type_abi (fntype) == MS_ABI)
9530 size = int_size_in_bytes (type);
9532 /* __m128 is returned in xmm0. */
9533 if ((!type || VECTOR_INTEGER_TYPE_P (type)
9534 || INTEGRAL_TYPE_P (type)
9535 || VECTOR_FLOAT_TYPE_P (type))
9536 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9537 && !COMPLEX_MODE_P (mode)
9538 && (GET_MODE_SIZE (mode) == 16 || size == 16))
9539 return false;
9541 /* Otherwise, the size must be exactly in [1248]. */
9542 return size != 1 && size != 2 && size != 4 && size != 8;
9544 else
9546 int needed_intregs, needed_sseregs;
9548 return examine_argument (mode, type, 1,
9549 &needed_intregs, &needed_sseregs);
9552 else
9554 size = int_size_in_bytes (type);
9556 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9557 bytes in registers. */
9558 if (TARGET_IAMCU)
9559 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9561 if (mode == BLKmode)
9562 return true;
9564 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9565 return false;
9567 if (VECTOR_MODE_P (mode) || mode == TImode)
9569 /* User-created vectors small enough to fit in EAX. */
9570 if (size < 8)
9571 return false;
9573 /* Unless ABI prescibes otherwise,
9574 MMX/3dNow values are returned in MM0 if available. */
9576 if (size == 8)
9577 return TARGET_VECT8_RETURNS || !TARGET_MMX;
9579 /* SSE values are returned in XMM0 if available. */
9580 if (size == 16)
9581 return !TARGET_SSE;
9583 /* AVX values are returned in YMM0 if available. */
9584 if (size == 32)
9585 return !TARGET_AVX;
9587 /* AVX512F values are returned in ZMM0 if available. */
9588 if (size == 64)
9589 return !TARGET_AVX512F;
9592 if (mode == XFmode)
9593 return false;
9595 if (size > 12)
9596 return true;
9598 /* OImode shouldn't be used directly. */
9599 gcc_assert (mode != OImode);
9601 return false;
9603 #endif
9607 /* Create the va_list data type. */
9609 static tree
9610 ix86_build_builtin_va_list_64 (void)
9612 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9614 record = lang_hooks.types.make_type (RECORD_TYPE);
9615 type_decl = build_decl (BUILTINS_LOCATION,
9616 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9618 f_gpr = build_decl (BUILTINS_LOCATION,
9619 FIELD_DECL, get_identifier ("gp_offset"),
9620 unsigned_type_node);
9621 f_fpr = build_decl (BUILTINS_LOCATION,
9622 FIELD_DECL, get_identifier ("fp_offset"),
9623 unsigned_type_node);
9624 f_ovf = build_decl (BUILTINS_LOCATION,
9625 FIELD_DECL, get_identifier ("overflow_arg_area"),
9626 ptr_type_node);
9627 f_sav = build_decl (BUILTINS_LOCATION,
9628 FIELD_DECL, get_identifier ("reg_save_area"),
9629 ptr_type_node);
9631 va_list_gpr_counter_field = f_gpr;
9632 va_list_fpr_counter_field = f_fpr;
9634 DECL_FIELD_CONTEXT (f_gpr) = record;
9635 DECL_FIELD_CONTEXT (f_fpr) = record;
9636 DECL_FIELD_CONTEXT (f_ovf) = record;
9637 DECL_FIELD_CONTEXT (f_sav) = record;
9639 TYPE_STUB_DECL (record) = type_decl;
9640 TYPE_NAME (record) = type_decl;
9641 TYPE_FIELDS (record) = f_gpr;
9642 DECL_CHAIN (f_gpr) = f_fpr;
9643 DECL_CHAIN (f_fpr) = f_ovf;
9644 DECL_CHAIN (f_ovf) = f_sav;
9646 layout_type (record);
9648 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
9649 NULL_TREE, TYPE_ATTRIBUTES (record));
9651 /* The correct type is an array type of one element. */
9652 return build_array_type (record, build_index_type (size_zero_node));
9655 /* Setup the builtin va_list data type and for 64-bit the additional
9656 calling convention specific va_list data types. */
9658 static tree
9659 ix86_build_builtin_va_list (void)
9661 if (TARGET_64BIT)
9663 /* Initialize ABI specific va_list builtin types.
9665 In lto1, we can encounter two va_list types:
9666 - one as a result of the type-merge across TUs, and
9667 - the one constructed here.
9668 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
9669 a type identity check in canonical_va_list_type based on
9670 TYPE_MAIN_VARIANT (which we used to have) will not work.
9671 Instead, we tag each va_list_type_node with its unique attribute, and
9672 look for the attribute in the type identity check in
9673 canonical_va_list_type.
9675 Tagging sysv_va_list_type_node directly with the attribute is
9676 problematic since it's a array of one record, which will degrade into a
9677 pointer to record when used as parameter (see build_va_arg comments for
9678 an example), dropping the attribute in the process. So we tag the
9679 record instead. */
9681 /* For SYSV_ABI we use an array of one record. */
9682 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
9684 /* For MS_ABI we use plain pointer to argument area. */
9685 tree char_ptr_type = build_pointer_type (char_type_node);
9686 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
9687 TYPE_ATTRIBUTES (char_ptr_type));
9688 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
9690 return ((ix86_abi == MS_ABI)
9691 ? ms_va_list_type_node
9692 : sysv_va_list_type_node);
9694 else
9696 /* For i386 we use plain pointer to argument area. */
9697 return build_pointer_type (char_type_node);
9701 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9703 static void
9704 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9706 rtx save_area, mem;
9707 alias_set_type set;
9708 int i, max;
9710 /* GPR size of varargs save area. */
9711 if (cfun->va_list_gpr_size)
9712 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9713 else
9714 ix86_varargs_gpr_size = 0;
9716 /* FPR size of varargs save area. We don't need it if we don't pass
9717 anything in SSE registers. */
9718 if (TARGET_SSE && cfun->va_list_fpr_size)
9719 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9720 else
9721 ix86_varargs_fpr_size = 0;
9723 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9724 return;
9726 save_area = frame_pointer_rtx;
9727 set = get_varargs_alias_set ();
9729 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9730 if (max > X86_64_REGPARM_MAX)
9731 max = X86_64_REGPARM_MAX;
9733 for (i = cum->regno; i < max; i++)
9735 mem = gen_rtx_MEM (word_mode,
9736 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9737 MEM_NOTRAP_P (mem) = 1;
9738 set_mem_alias_set (mem, set);
9739 emit_move_insn (mem,
9740 gen_rtx_REG (word_mode,
9741 x86_64_int_parameter_registers[i]));
9744 if (ix86_varargs_fpr_size)
9746 machine_mode smode;
9747 rtx_code_label *label;
9748 rtx test;
9750 /* Now emit code to save SSE registers. The AX parameter contains number
9751 of SSE parameter registers used to call this function, though all we
9752 actually check here is the zero/non-zero status. */
9754 label = gen_label_rtx ();
9755 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
9756 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
9757 label));
9759 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9760 we used movdqa (i.e. TImode) instead? Perhaps even better would
9761 be if we could determine the real mode of the data, via a hook
9762 into pass_stdarg. Ignore all that for now. */
9763 smode = V4SFmode;
9764 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
9765 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
9767 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
9768 if (max > X86_64_SSE_REGPARM_MAX)
9769 max = X86_64_SSE_REGPARM_MAX;
9771 for (i = cum->sse_regno; i < max; ++i)
9773 mem = plus_constant (Pmode, save_area,
9774 i * 16 + ix86_varargs_gpr_size);
9775 mem = gen_rtx_MEM (smode, mem);
9776 MEM_NOTRAP_P (mem) = 1;
9777 set_mem_alias_set (mem, set);
9778 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
9780 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
9783 emit_label (label);
9787 static void
9788 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
9790 alias_set_type set = get_varargs_alias_set ();
9791 int i;
9793 /* Reset to zero, as there might be a sysv vaarg used
9794 before. */
9795 ix86_varargs_gpr_size = 0;
9796 ix86_varargs_fpr_size = 0;
9798 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
9800 rtx reg, mem;
9802 mem = gen_rtx_MEM (Pmode,
9803 plus_constant (Pmode, virtual_incoming_args_rtx,
9804 i * UNITS_PER_WORD));
9805 MEM_NOTRAP_P (mem) = 1;
9806 set_mem_alias_set (mem, set);
9808 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
9809 emit_move_insn (mem, reg);
9813 static void
9814 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
9815 tree type, int *, int no_rtl)
9817 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9818 CUMULATIVE_ARGS next_cum;
9819 tree fntype;
9821 /* This argument doesn't appear to be used anymore. Which is good,
9822 because the old code here didn't suppress rtl generation. */
9823 gcc_assert (!no_rtl);
9825 if (!TARGET_64BIT)
9826 return;
9828 fntype = TREE_TYPE (current_function_decl);
9830 /* For varargs, we do not want to skip the dummy va_dcl argument.
9831 For stdargs, we do want to skip the last named argument. */
9832 next_cum = *cum;
9833 if (stdarg_p (fntype))
9834 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9835 true);
9837 if (cum->call_abi == MS_ABI)
9838 setup_incoming_varargs_ms_64 (&next_cum);
9839 else
9840 setup_incoming_varargs_64 (&next_cum);
9843 static void
9844 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
9845 machine_mode mode,
9846 tree type,
9847 int *pretend_size ATTRIBUTE_UNUSED,
9848 int no_rtl)
9850 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9851 CUMULATIVE_ARGS next_cum;
9852 tree fntype;
9853 rtx save_area;
9854 int bnd_reg, i, max;
9856 gcc_assert (!no_rtl);
9858 /* Do nothing if we use plain pointer to argument area. */
9859 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
9860 return;
9862 fntype = TREE_TYPE (current_function_decl);
9864 /* For varargs, we do not want to skip the dummy va_dcl argument.
9865 For stdargs, we do want to skip the last named argument. */
9866 next_cum = *cum;
9867 if (stdarg_p (fntype))
9868 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9869 true);
9870 save_area = frame_pointer_rtx;
9872 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9873 if (max > X86_64_REGPARM_MAX)
9874 max = X86_64_REGPARM_MAX;
9876 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
9877 if (chkp_function_instrumented_p (current_function_decl))
9878 for (i = cum->regno; i < max; i++)
9880 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
9881 rtx ptr = gen_rtx_REG (Pmode,
9882 x86_64_int_parameter_registers[i]);
9883 rtx bounds;
9885 if (bnd_reg <= LAST_BND_REG)
9886 bounds = gen_rtx_REG (BNDmode, bnd_reg);
9887 else
9889 rtx ldx_addr =
9890 plus_constant (Pmode, arg_pointer_rtx,
9891 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
9892 bounds = gen_reg_rtx (BNDmode);
9893 emit_insn (BNDmode == BND64mode
9894 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
9895 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
9898 emit_insn (BNDmode == BND64mode
9899 ? gen_bnd64_stx (addr, ptr, bounds)
9900 : gen_bnd32_stx (addr, ptr, bounds));
9902 bnd_reg++;
9907 /* Checks if TYPE is of kind va_list char *. */
9909 static bool
9910 is_va_list_char_pointer (tree type)
9912 tree canonic;
9914 /* For 32-bit it is always true. */
9915 if (!TARGET_64BIT)
9916 return true;
9917 canonic = ix86_canonical_va_list_type (type);
9918 return (canonic == ms_va_list_type_node
9919 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
9922 /* Implement va_start. */
9924 static void
9925 ix86_va_start (tree valist, rtx nextarg)
9927 HOST_WIDE_INT words, n_gpr, n_fpr;
9928 tree f_gpr, f_fpr, f_ovf, f_sav;
9929 tree gpr, fpr, ovf, sav, t;
9930 tree type;
9931 rtx ovf_rtx;
9933 if (flag_split_stack
9934 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9936 unsigned int scratch_regno;
9938 /* When we are splitting the stack, we can't refer to the stack
9939 arguments using internal_arg_pointer, because they may be on
9940 the old stack. The split stack prologue will arrange to
9941 leave a pointer to the old stack arguments in a scratch
9942 register, which we here copy to a pseudo-register. The split
9943 stack prologue can't set the pseudo-register directly because
9944 it (the prologue) runs before any registers have been saved. */
9946 scratch_regno = split_stack_prologue_scratch_regno ();
9947 if (scratch_regno != INVALID_REGNUM)
9949 rtx reg;
9950 rtx_insn *seq;
9952 reg = gen_reg_rtx (Pmode);
9953 cfun->machine->split_stack_varargs_pointer = reg;
9955 start_sequence ();
9956 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
9957 seq = get_insns ();
9958 end_sequence ();
9960 push_topmost_sequence ();
9961 emit_insn_after (seq, entry_of_function ());
9962 pop_topmost_sequence ();
9966 /* Only 64bit target needs something special. */
9967 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9969 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9970 std_expand_builtin_va_start (valist, nextarg);
9971 else
9973 rtx va_r, next;
9975 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
9976 next = expand_binop (ptr_mode, add_optab,
9977 cfun->machine->split_stack_varargs_pointer,
9978 crtl->args.arg_offset_rtx,
9979 NULL_RTX, 0, OPTAB_LIB_WIDEN);
9980 convert_move (va_r, next, 0);
9982 /* Store zero bounds for va_list. */
9983 if (chkp_function_instrumented_p (current_function_decl))
9984 chkp_expand_bounds_reset_for_mem (valist,
9985 make_tree (TREE_TYPE (valist),
9986 next));
9989 return;
9992 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9993 f_fpr = DECL_CHAIN (f_gpr);
9994 f_ovf = DECL_CHAIN (f_fpr);
9995 f_sav = DECL_CHAIN (f_ovf);
9997 valist = build_simple_mem_ref (valist);
9998 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9999 /* The following should be folded into the MEM_REF offset. */
10000 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
10001 f_gpr, NULL_TREE);
10002 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
10003 f_fpr, NULL_TREE);
10004 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
10005 f_ovf, NULL_TREE);
10006 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
10007 f_sav, NULL_TREE);
10009 /* Count number of gp and fp argument registers used. */
10010 words = crtl->args.info.words;
10011 n_gpr = crtl->args.info.regno;
10012 n_fpr = crtl->args.info.sse_regno;
10014 if (cfun->va_list_gpr_size)
10016 type = TREE_TYPE (gpr);
10017 t = build2 (MODIFY_EXPR, type,
10018 gpr, build_int_cst (type, n_gpr * 8));
10019 TREE_SIDE_EFFECTS (t) = 1;
10020 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10023 if (TARGET_SSE && cfun->va_list_fpr_size)
10025 type = TREE_TYPE (fpr);
10026 t = build2 (MODIFY_EXPR, type, fpr,
10027 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
10028 TREE_SIDE_EFFECTS (t) = 1;
10029 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10032 /* Find the overflow area. */
10033 type = TREE_TYPE (ovf);
10034 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10035 ovf_rtx = crtl->args.internal_arg_pointer;
10036 else
10037 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
10038 t = make_tree (type, ovf_rtx);
10039 if (words != 0)
10040 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
10042 /* Store zero bounds for overflow area pointer. */
10043 if (chkp_function_instrumented_p (current_function_decl))
10044 chkp_expand_bounds_reset_for_mem (ovf, t);
10046 t = build2 (MODIFY_EXPR, type, ovf, t);
10047 TREE_SIDE_EFFECTS (t) = 1;
10048 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10050 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
10052 /* Find the register save area.
10053 Prologue of the function save it right above stack frame. */
10054 type = TREE_TYPE (sav);
10055 t = make_tree (type, frame_pointer_rtx);
10056 if (!ix86_varargs_gpr_size)
10057 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
10059 /* Store zero bounds for save area pointer. */
10060 if (chkp_function_instrumented_p (current_function_decl))
10061 chkp_expand_bounds_reset_for_mem (sav, t);
10063 t = build2 (MODIFY_EXPR, type, sav, t);
10064 TREE_SIDE_EFFECTS (t) = 1;
10065 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10069 /* Implement va_arg. */
10071 static tree
10072 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
10073 gimple_seq *post_p)
10075 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
10076 tree f_gpr, f_fpr, f_ovf, f_sav;
10077 tree gpr, fpr, ovf, sav, t;
10078 int size, rsize;
10079 tree lab_false, lab_over = NULL_TREE;
10080 tree addr, t2;
10081 rtx container;
10082 int indirect_p = 0;
10083 tree ptrtype;
10084 machine_mode nat_mode;
10085 unsigned int arg_boundary;
10087 /* Only 64bit target needs something special. */
10088 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10089 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
10091 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10092 f_fpr = DECL_CHAIN (f_gpr);
10093 f_ovf = DECL_CHAIN (f_fpr);
10094 f_sav = DECL_CHAIN (f_ovf);
10096 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
10097 valist, f_gpr, NULL_TREE);
10099 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
10100 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
10101 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
10103 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10104 if (indirect_p)
10105 type = build_pointer_type (type);
10106 size = arg_int_size_in_bytes (type);
10107 rsize = CEIL (size, UNITS_PER_WORD);
10109 nat_mode = type_natural_mode (type, NULL, false);
10110 switch (nat_mode)
10112 case E_V8SFmode:
10113 case E_V8SImode:
10114 case E_V32QImode:
10115 case E_V16HImode:
10116 case E_V4DFmode:
10117 case E_V4DImode:
10118 case E_V16SFmode:
10119 case E_V16SImode:
10120 case E_V64QImode:
10121 case E_V32HImode:
10122 case E_V8DFmode:
10123 case E_V8DImode:
10124 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
10125 if (!TARGET_64BIT_MS_ABI)
10127 container = NULL;
10128 break;
10130 /* FALLTHRU */
10132 default:
10133 container = construct_container (nat_mode, TYPE_MODE (type),
10134 type, 0, X86_64_REGPARM_MAX,
10135 X86_64_SSE_REGPARM_MAX, intreg,
10137 break;
10140 /* Pull the value out of the saved registers. */
10142 addr = create_tmp_var (ptr_type_node, "addr");
10144 if (container)
10146 int needed_intregs, needed_sseregs;
10147 bool need_temp;
10148 tree int_addr, sse_addr;
10150 lab_false = create_artificial_label (UNKNOWN_LOCATION);
10151 lab_over = create_artificial_label (UNKNOWN_LOCATION);
10153 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
10155 need_temp = (!REG_P (container)
10156 && ((needed_intregs && TYPE_ALIGN (type) > 64)
10157 || TYPE_ALIGN (type) > 128));
10159 /* In case we are passing structure, verify that it is consecutive block
10160 on the register save area. If not we need to do moves. */
10161 if (!need_temp && !REG_P (container))
10163 /* Verify that all registers are strictly consecutive */
10164 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
10166 int i;
10168 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10170 rtx slot = XVECEXP (container, 0, i);
10171 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
10172 || INTVAL (XEXP (slot, 1)) != i * 16)
10173 need_temp = true;
10176 else
10178 int i;
10180 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10182 rtx slot = XVECEXP (container, 0, i);
10183 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
10184 || INTVAL (XEXP (slot, 1)) != i * 8)
10185 need_temp = true;
10189 if (!need_temp)
10191 int_addr = addr;
10192 sse_addr = addr;
10194 else
10196 int_addr = create_tmp_var (ptr_type_node, "int_addr");
10197 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
10200 /* First ensure that we fit completely in registers. */
10201 if (needed_intregs)
10203 t = build_int_cst (TREE_TYPE (gpr),
10204 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
10205 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
10206 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10207 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10208 gimplify_and_add (t, pre_p);
10210 if (needed_sseregs)
10212 t = build_int_cst (TREE_TYPE (fpr),
10213 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
10214 + X86_64_REGPARM_MAX * 8);
10215 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
10216 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10217 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10218 gimplify_and_add (t, pre_p);
10221 /* Compute index to start of area used for integer regs. */
10222 if (needed_intregs)
10224 /* int_addr = gpr + sav; */
10225 t = fold_build_pointer_plus (sav, gpr);
10226 gimplify_assign (int_addr, t, pre_p);
10228 if (needed_sseregs)
10230 /* sse_addr = fpr + sav; */
10231 t = fold_build_pointer_plus (sav, fpr);
10232 gimplify_assign (sse_addr, t, pre_p);
10234 if (need_temp)
10236 int i, prev_size = 0;
10237 tree temp = create_tmp_var (type, "va_arg_tmp");
10239 /* addr = &temp; */
10240 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
10241 gimplify_assign (addr, t, pre_p);
10243 for (i = 0; i < XVECLEN (container, 0); i++)
10245 rtx slot = XVECEXP (container, 0, i);
10246 rtx reg = XEXP (slot, 0);
10247 machine_mode mode = GET_MODE (reg);
10248 tree piece_type;
10249 tree addr_type;
10250 tree daddr_type;
10251 tree src_addr, src;
10252 int src_offset;
10253 tree dest_addr, dest;
10254 int cur_size = GET_MODE_SIZE (mode);
10256 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
10257 prev_size = INTVAL (XEXP (slot, 1));
10258 if (prev_size + cur_size > size)
10260 cur_size = size - prev_size;
10261 unsigned int nbits = cur_size * BITS_PER_UNIT;
10262 if (!int_mode_for_size (nbits, 1).exists (&mode))
10263 mode = QImode;
10265 piece_type = lang_hooks.types.type_for_mode (mode, 1);
10266 if (mode == GET_MODE (reg))
10267 addr_type = build_pointer_type (piece_type);
10268 else
10269 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10270 true);
10271 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10272 true);
10274 if (SSE_REGNO_P (REGNO (reg)))
10276 src_addr = sse_addr;
10277 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
10279 else
10281 src_addr = int_addr;
10282 src_offset = REGNO (reg) * 8;
10284 src_addr = fold_convert (addr_type, src_addr);
10285 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
10287 dest_addr = fold_convert (daddr_type, addr);
10288 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
10289 if (cur_size == GET_MODE_SIZE (mode))
10291 src = build_va_arg_indirect_ref (src_addr);
10292 dest = build_va_arg_indirect_ref (dest_addr);
10294 gimplify_assign (dest, src, pre_p);
10296 else
10298 tree copy
10299 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
10300 3, dest_addr, src_addr,
10301 size_int (cur_size));
10302 gimplify_and_add (copy, pre_p);
10304 prev_size += cur_size;
10308 if (needed_intregs)
10310 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
10311 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
10312 gimplify_assign (gpr, t, pre_p);
10315 if (needed_sseregs)
10317 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
10318 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
10319 gimplify_assign (unshare_expr (fpr), t, pre_p);
10322 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10324 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
10327 /* ... otherwise out of the overflow area. */
10329 /* When we align parameter on stack for caller, if the parameter
10330 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
10331 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
10332 here with caller. */
10333 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
10334 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
10335 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
10337 /* Care for on-stack alignment if needed. */
10338 if (arg_boundary <= 64 || size == 0)
10339 t = ovf;
10340 else
10342 HOST_WIDE_INT align = arg_boundary / 8;
10343 t = fold_build_pointer_plus_hwi (ovf, align - 1);
10344 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10345 build_int_cst (TREE_TYPE (t), -align));
10348 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10349 gimplify_assign (addr, t, pre_p);
10351 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
10352 gimplify_assign (unshare_expr (ovf), t, pre_p);
10354 if (container)
10355 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
10357 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
10358 addr = fold_convert (ptrtype, addr);
10360 if (indirect_p)
10361 addr = build_va_arg_indirect_ref (addr);
10362 return build_va_arg_indirect_ref (addr);
10365 /* Return true if OPNUM's MEM should be matched
10366 in movabs* patterns. */
10368 bool
10369 ix86_check_movabs (rtx insn, int opnum)
10371 rtx set, mem;
10373 set = PATTERN (insn);
10374 if (GET_CODE (set) == PARALLEL)
10375 set = XVECEXP (set, 0, 0);
10376 gcc_assert (GET_CODE (set) == SET);
10377 mem = XEXP (set, opnum);
10378 while (SUBREG_P (mem))
10379 mem = SUBREG_REG (mem);
10380 gcc_assert (MEM_P (mem));
10381 return volatile_ok || !MEM_VOLATILE_P (mem);
10384 /* Return false if INSN contains a MEM with a non-default address space. */
10385 bool
10386 ix86_check_no_addr_space (rtx insn)
10388 subrtx_var_iterator::array_type array;
10389 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
10391 rtx x = *iter;
10392 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
10393 return false;
10395 return true;
10398 /* Initialize the table of extra 80387 mathematical constants. */
10400 static void
10401 init_ext_80387_constants (void)
10403 static const char * cst[5] =
10405 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
10406 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
10407 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
10408 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
10409 "3.1415926535897932385128089594061862044", /* 4: fldpi */
10411 int i;
10413 for (i = 0; i < 5; i++)
10415 real_from_string (&ext_80387_constants_table[i], cst[i]);
10416 /* Ensure each constant is rounded to XFmode precision. */
10417 real_convert (&ext_80387_constants_table[i],
10418 XFmode, &ext_80387_constants_table[i]);
10421 ext_80387_constants_init = 1;
10424 /* Return non-zero if the constant is something that
10425 can be loaded with a special instruction. */
10428 standard_80387_constant_p (rtx x)
10430 machine_mode mode = GET_MODE (x);
10432 const REAL_VALUE_TYPE *r;
10434 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10435 return -1;
10437 if (x == CONST0_RTX (mode))
10438 return 1;
10439 if (x == CONST1_RTX (mode))
10440 return 2;
10442 r = CONST_DOUBLE_REAL_VALUE (x);
10444 /* For XFmode constants, try to find a special 80387 instruction when
10445 optimizing for size or on those CPUs that benefit from them. */
10446 if (mode == XFmode
10447 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10449 int i;
10451 if (! ext_80387_constants_init)
10452 init_ext_80387_constants ();
10454 for (i = 0; i < 5; i++)
10455 if (real_identical (r, &ext_80387_constants_table[i]))
10456 return i + 3;
10459 /* Load of the constant -0.0 or -1.0 will be split as
10460 fldz;fchs or fld1;fchs sequence. */
10461 if (real_isnegzero (r))
10462 return 8;
10463 if (real_identical (r, &dconstm1))
10464 return 9;
10466 return 0;
10469 /* Return the opcode of the special instruction to be used to load
10470 the constant X. */
10472 const char *
10473 standard_80387_constant_opcode (rtx x)
10475 switch (standard_80387_constant_p (x))
10477 case 1:
10478 return "fldz";
10479 case 2:
10480 return "fld1";
10481 case 3:
10482 return "fldlg2";
10483 case 4:
10484 return "fldln2";
10485 case 5:
10486 return "fldl2e";
10487 case 6:
10488 return "fldl2t";
10489 case 7:
10490 return "fldpi";
10491 case 8:
10492 case 9:
10493 return "#";
10494 default:
10495 gcc_unreachable ();
10499 /* Return the CONST_DOUBLE representing the 80387 constant that is
10500 loaded by the specified special instruction. The argument IDX
10501 matches the return value from standard_80387_constant_p. */
10504 standard_80387_constant_rtx (int idx)
10506 int i;
10508 if (! ext_80387_constants_init)
10509 init_ext_80387_constants ();
10511 switch (idx)
10513 case 3:
10514 case 4:
10515 case 5:
10516 case 6:
10517 case 7:
10518 i = idx - 3;
10519 break;
10521 default:
10522 gcc_unreachable ();
10525 return const_double_from_real_value (ext_80387_constants_table[i],
10526 XFmode);
10529 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
10530 in supported SSE/AVX vector mode. */
10533 standard_sse_constant_p (rtx x, machine_mode pred_mode)
10535 machine_mode mode;
10537 if (!TARGET_SSE)
10538 return 0;
10540 mode = GET_MODE (x);
10542 if (x == const0_rtx || const0_operand (x, mode))
10543 return 1;
10545 if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10547 /* VOIDmode integer constant, get mode from the predicate. */
10548 if (mode == VOIDmode)
10549 mode = pred_mode;
10551 switch (GET_MODE_SIZE (mode))
10553 case 64:
10554 if (TARGET_AVX512F)
10555 return 2;
10556 break;
10557 case 32:
10558 if (TARGET_AVX2)
10559 return 2;
10560 break;
10561 case 16:
10562 if (TARGET_SSE2)
10563 return 2;
10564 break;
10565 case 0:
10566 /* VOIDmode */
10567 gcc_unreachable ();
10568 default:
10569 break;
10573 return 0;
10576 /* Return the opcode of the special instruction to be used to load
10577 the constant operands[1] into operands[0]. */
10579 const char *
10580 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
10582 machine_mode mode;
10583 rtx x = operands[1];
10585 gcc_assert (TARGET_SSE);
10587 mode = GET_MODE (x);
10589 if (x == const0_rtx || const0_operand (x, mode))
10591 switch (get_attr_mode (insn))
10593 case MODE_TI:
10594 if (!EXT_REX_SSE_REG_P (operands[0]))
10595 return "%vpxor\t%0, %d0";
10596 /* FALLTHRU */
10597 case MODE_XI:
10598 case MODE_OI:
10599 if (EXT_REX_SSE_REG_P (operands[0]))
10600 return (TARGET_AVX512VL
10601 ? "vpxord\t%x0, %x0, %x0"
10602 : "vpxord\t%g0, %g0, %g0");
10603 return "vpxor\t%x0, %x0, %x0";
10605 case MODE_V2DF:
10606 if (!EXT_REX_SSE_REG_P (operands[0]))
10607 return "%vxorpd\t%0, %d0";
10608 /* FALLTHRU */
10609 case MODE_V8DF:
10610 case MODE_V4DF:
10611 if (!EXT_REX_SSE_REG_P (operands[0]))
10612 return "vxorpd\t%x0, %x0, %x0";
10613 else if (TARGET_AVX512DQ)
10614 return (TARGET_AVX512VL
10615 ? "vxorpd\t%x0, %x0, %x0"
10616 : "vxorpd\t%g0, %g0, %g0");
10617 else
10618 return (TARGET_AVX512VL
10619 ? "vpxorq\t%x0, %x0, %x0"
10620 : "vpxorq\t%g0, %g0, %g0");
10622 case MODE_V4SF:
10623 if (!EXT_REX_SSE_REG_P (operands[0]))
10624 return "%vxorps\t%0, %d0";
10625 /* FALLTHRU */
10626 case MODE_V16SF:
10627 case MODE_V8SF:
10628 if (!EXT_REX_SSE_REG_P (operands[0]))
10629 return "vxorps\t%x0, %x0, %x0";
10630 else if (TARGET_AVX512DQ)
10631 return (TARGET_AVX512VL
10632 ? "vxorps\t%x0, %x0, %x0"
10633 : "vxorps\t%g0, %g0, %g0");
10634 else
10635 return (TARGET_AVX512VL
10636 ? "vpxord\t%x0, %x0, %x0"
10637 : "vpxord\t%g0, %g0, %g0");
10639 default:
10640 gcc_unreachable ();
10643 else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10645 enum attr_mode insn_mode = get_attr_mode (insn);
10647 switch (insn_mode)
10649 case MODE_XI:
10650 case MODE_V8DF:
10651 case MODE_V16SF:
10652 gcc_assert (TARGET_AVX512F);
10653 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10655 case MODE_OI:
10656 case MODE_V4DF:
10657 case MODE_V8SF:
10658 gcc_assert (TARGET_AVX2);
10659 /* FALLTHRU */
10660 case MODE_TI:
10661 case MODE_V2DF:
10662 case MODE_V4SF:
10663 gcc_assert (TARGET_SSE2);
10664 if (!EXT_REX_SSE_REG_P (operands[0]))
10665 return (TARGET_AVX
10666 ? "vpcmpeqd\t%0, %0, %0"
10667 : "pcmpeqd\t%0, %0");
10668 else if (TARGET_AVX512VL)
10669 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
10670 else
10671 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10673 default:
10674 gcc_unreachable ();
10678 gcc_unreachable ();
10681 /* Returns true if INSN can be transformed from a memory load
10682 to a supported FP constant load. */
10684 bool
10685 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
10687 rtx src = find_constant_src (insn);
10689 gcc_assert (REG_P (dst));
10691 if (src == NULL
10692 || (SSE_REGNO_P (REGNO (dst))
10693 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
10694 || (STACK_REGNO_P (REGNO (dst))
10695 && standard_80387_constant_p (src) < 1))
10696 return false;
10698 return true;
10701 /* Returns true if OP contains a symbol reference */
10703 bool
10704 symbolic_reference_mentioned_p (rtx op)
10706 const char *fmt;
10707 int i;
10709 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10710 return true;
10712 fmt = GET_RTX_FORMAT (GET_CODE (op));
10713 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10715 if (fmt[i] == 'E')
10717 int j;
10719 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10720 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10721 return true;
10724 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10725 return true;
10728 return false;
10731 /* Return true if it is appropriate to emit `ret' instructions in the
10732 body of a function. Do this only if the epilogue is simple, needing a
10733 couple of insns. Prior to reloading, we can't tell how many registers
10734 must be saved, so return false then. Return false if there is no frame
10735 marker to de-allocate. */
10737 bool
10738 ix86_can_use_return_insn_p (void)
10740 if (ix86_function_naked (current_function_decl))
10741 return false;
10743 /* Don't use `ret' instruction in interrupt handler. */
10744 if (! reload_completed
10745 || frame_pointer_needed
10746 || cfun->machine->func_type != TYPE_NORMAL)
10747 return 0;
10749 /* Don't allow more than 32k pop, since that's all we can do
10750 with one instruction. */
10751 if (crtl->args.pops_args && crtl->args.size >= 32768)
10752 return 0;
10754 struct ix86_frame &frame = cfun->machine->frame;
10755 return (frame.stack_pointer_offset == UNITS_PER_WORD
10756 && (frame.nregs + frame.nsseregs) == 0);
10759 /* Value should be nonzero if functions must have frame pointers.
10760 Zero means the frame pointer need not be set up (and parms may
10761 be accessed via the stack pointer) in functions that seem suitable. */
10763 static bool
10764 ix86_frame_pointer_required (void)
10766 /* If we accessed previous frames, then the generated code expects
10767 to be able to access the saved ebp value in our frame. */
10768 if (cfun->machine->accesses_prev_frame)
10769 return true;
10771 /* Several x86 os'es need a frame pointer for other reasons,
10772 usually pertaining to setjmp. */
10773 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10774 return true;
10776 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10777 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10778 return true;
10780 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10781 allocation is 4GB. */
10782 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10783 return true;
10785 /* SSE saves require frame-pointer when stack is misaligned. */
10786 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
10787 return true;
10789 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10790 turns off the frame pointer by default. Turn it back on now if
10791 we've not got a leaf function. */
10792 if (TARGET_OMIT_LEAF_FRAME_POINTER
10793 && (!crtl->is_leaf
10794 || ix86_current_function_calls_tls_descriptor))
10795 return true;
10797 if (crtl->profile && !flag_fentry)
10798 return true;
10800 return false;
10803 /* Record that the current function accesses previous call frames. */
10805 void
10806 ix86_setup_frame_addresses (void)
10808 cfun->machine->accesses_prev_frame = 1;
10811 #ifndef USE_HIDDEN_LINKONCE
10812 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10813 # define USE_HIDDEN_LINKONCE 1
10814 # else
10815 # define USE_HIDDEN_LINKONCE 0
10816 # endif
10817 #endif
10819 /* Label count for call and return thunks. It is used to make unique
10820 labels in call and return thunks. */
10821 static int indirectlabelno;
10823 /* True if call thunk function is needed. */
10824 static bool indirect_thunk_needed = false;
10825 /* True if call thunk function with the BND prefix is needed. */
10826 static bool indirect_thunk_bnd_needed = false;
10828 /* Bit masks of integer registers, which contain branch target, used
10829 by call thunk functions. */
10830 static int indirect_thunks_used;
10831 /* Bit masks of integer registers, which contain branch target, used
10832 by call thunk functions with the BND prefix. */
10833 static int indirect_thunks_bnd_used;
10835 /* True if return thunk function is needed. */
10836 static bool indirect_return_needed = false;
10837 /* True if return thunk function with the BND prefix is needed. */
10838 static bool indirect_return_bnd_needed = false;
10840 /* True if return thunk function via CX is needed. */
10841 static bool indirect_return_via_cx;
10842 /* True if return thunk function via CX with the BND prefix is
10843 needed. */
10844 static bool indirect_return_via_cx_bnd;
10846 #ifndef INDIRECT_LABEL
10847 # define INDIRECT_LABEL "LIND"
10848 #endif
10850 /* Indicate what prefix is needed for an indirect branch. */
10851 enum indirect_thunk_prefix
10853 indirect_thunk_prefix_none,
10854 indirect_thunk_prefix_bnd,
10855 indirect_thunk_prefix_nt
10858 /* Return the prefix needed for an indirect branch INSN. */
10860 enum indirect_thunk_prefix
10861 indirect_thunk_need_prefix (rtx_insn *insn)
10863 enum indirect_thunk_prefix need_prefix;
10864 if (ix86_bnd_prefixed_insn_p (insn))
10865 need_prefix = indirect_thunk_prefix_bnd;
10866 else if ((cfun->machine->indirect_branch_type
10867 == indirect_branch_thunk_extern)
10868 && ix86_notrack_prefixed_insn_p (insn))
10870 /* NOTRACK prefix is only used with external thunk so that it
10871 can be properly updated to support CET at run-time. */
10872 need_prefix = indirect_thunk_prefix_nt;
10874 else
10875 need_prefix = indirect_thunk_prefix_none;
10876 return need_prefix;
10879 /* Fills in the label name that should be used for the indirect thunk. */
10881 static void
10882 indirect_thunk_name (char name[32], unsigned int regno,
10883 enum indirect_thunk_prefix need_prefix,
10884 bool ret_p)
10886 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
10887 gcc_unreachable ();
10889 if (USE_HIDDEN_LINKONCE)
10891 const char *prefix;
10893 if (need_prefix == indirect_thunk_prefix_bnd)
10894 prefix = "_bnd";
10895 else if (need_prefix == indirect_thunk_prefix_nt
10896 && regno != INVALID_REGNUM)
10898 /* NOTRACK prefix is only used with external thunk via
10899 register so that NOTRACK prefix can be added to indirect
10900 branch via register to support CET at run-time. */
10901 prefix = "_nt";
10903 else
10904 prefix = "";
10906 const char *ret = ret_p ? "return" : "indirect";
10908 if (regno != INVALID_REGNUM)
10910 const char *reg_prefix;
10911 if (LEGACY_INT_REGNO_P (regno))
10912 reg_prefix = TARGET_64BIT ? "r" : "e";
10913 else
10914 reg_prefix = "";
10915 sprintf (name, "__x86_%s_thunk%s_%s%s",
10916 ret, prefix, reg_prefix, reg_names[regno]);
10918 else
10919 sprintf (name, "__x86_%s_thunk%s", ret, prefix);
10921 else
10923 if (regno != INVALID_REGNUM)
10925 if (need_prefix == indirect_thunk_prefix_bnd)
10926 ASM_GENERATE_INTERNAL_LABEL (name, "LITBR", regno);
10927 else
10928 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
10930 else
10932 if (ret_p)
10934 if (need_prefix == indirect_thunk_prefix_bnd)
10935 ASM_GENERATE_INTERNAL_LABEL (name, "LRTB", 0);
10936 else
10937 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
10939 else
10941 if (need_prefix == indirect_thunk_prefix_bnd)
10942 ASM_GENERATE_INTERNAL_LABEL (name, "LITB", 0);
10943 else
10944 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
10950 /* Output a call and return thunk for indirect branch. If BND_P is
10951 true, the BND prefix is needed. If REGNO != -1, the function
10952 address is in REGNO and the call and return thunk looks like:
10954 call L2
10956 pause
10957 lfence
10958 jmp L1
10960 mov %REG, (%sp)
10963 Otherwise, the function address is on the top of stack and the
10964 call and return thunk looks like:
10966 call L2
10968 pause
10969 lfence
10970 jmp L1
10972 lea WORD_SIZE(%sp), %sp
10976 static void
10977 output_indirect_thunk (enum indirect_thunk_prefix need_prefix,
10978 unsigned int regno)
10980 char indirectlabel1[32];
10981 char indirectlabel2[32];
10983 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
10984 indirectlabelno++);
10985 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
10986 indirectlabelno++);
10988 /* Call */
10989 if (need_prefix == indirect_thunk_prefix_bnd)
10990 fputs ("\tbnd call\t", asm_out_file);
10991 else
10992 fputs ("\tcall\t", asm_out_file);
10993 assemble_name_raw (asm_out_file, indirectlabel2);
10994 fputc ('\n', asm_out_file);
10996 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
10998 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
10999 Usage of both pause + lfence is compromise solution. */
11000 fprintf (asm_out_file, "\tpause\n\tlfence\n");
11002 /* Jump. */
11003 fputs ("\tjmp\t", asm_out_file);
11004 assemble_name_raw (asm_out_file, indirectlabel1);
11005 fputc ('\n', asm_out_file);
11007 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
11009 if (regno != INVALID_REGNUM)
11011 /* MOV. */
11012 rtx xops[2];
11013 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
11014 xops[1] = gen_rtx_REG (word_mode, regno);
11015 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
11017 else
11019 /* LEA. */
11020 rtx xops[2];
11021 xops[0] = stack_pointer_rtx;
11022 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11023 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
11026 if (need_prefix == indirect_thunk_prefix_bnd)
11027 fputs ("\tbnd ret\n", asm_out_file);
11028 else
11029 fputs ("\tret\n", asm_out_file);
11032 /* Output a funtion with a call and return thunk for indirect branch.
11033 If BND_P is true, the BND prefix is needed. If REGNO != UNVALID_REGNUM,
11034 the function address is in REGNO. Otherwise, the function address is
11035 on the top of stack. Thunk is used for function return if RET_P is
11036 true. */
11038 static void
11039 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
11040 unsigned int regno, bool ret_p)
11042 char name[32];
11043 tree decl;
11045 /* Create __x86_indirect_thunk/__x86_indirect_thunk_bnd. */
11046 indirect_thunk_name (name, regno, need_prefix, ret_p);
11047 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11048 get_identifier (name),
11049 build_function_type_list (void_type_node, NULL_TREE));
11050 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11051 NULL_TREE, void_type_node);
11052 TREE_PUBLIC (decl) = 1;
11053 TREE_STATIC (decl) = 1;
11054 DECL_IGNORED_P (decl) = 1;
11056 #if TARGET_MACHO
11057 if (TARGET_MACHO)
11059 switch_to_section (darwin_sections[picbase_thunk_section]);
11060 fputs ("\t.weak_definition\t", asm_out_file);
11061 assemble_name (asm_out_file, name);
11062 fputs ("\n\t.private_extern\t", asm_out_file);
11063 assemble_name (asm_out_file, name);
11064 putc ('\n', asm_out_file);
11065 ASM_OUTPUT_LABEL (asm_out_file, name);
11066 DECL_WEAK (decl) = 1;
11068 else
11069 #endif
11070 if (USE_HIDDEN_LINKONCE)
11072 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
11074 targetm.asm_out.unique_section (decl, 0);
11075 switch_to_section (get_named_section (decl, NULL, 0));
11077 targetm.asm_out.globalize_label (asm_out_file, name);
11078 fputs ("\t.hidden\t", asm_out_file);
11079 assemble_name (asm_out_file, name);
11080 putc ('\n', asm_out_file);
11081 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
11083 else
11085 switch_to_section (text_section);
11086 ASM_OUTPUT_LABEL (asm_out_file, name);
11089 DECL_INITIAL (decl) = make_node (BLOCK);
11090 current_function_decl = decl;
11091 allocate_struct_function (decl, false);
11092 init_function_start (decl);
11093 /* We're about to hide the function body from callees of final_* by
11094 emitting it directly; tell them we're a thunk, if they care. */
11095 cfun->is_thunk = true;
11096 first_function_block_is_cold = false;
11097 /* Make sure unwind info is emitted for the thunk if needed. */
11098 final_start_function (emit_barrier (), asm_out_file, 1);
11100 output_indirect_thunk (need_prefix, regno);
11102 final_end_function ();
11103 init_insn_lengths ();
11104 free_after_compilation (cfun);
11105 set_cfun (NULL);
11106 current_function_decl = NULL;
11109 static int pic_labels_used;
11111 /* Fills in the label name that should be used for a pc thunk for
11112 the given register. */
11114 static void
11115 get_pc_thunk_name (char name[32], unsigned int regno)
11117 gcc_assert (!TARGET_64BIT);
11119 if (USE_HIDDEN_LINKONCE)
11120 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
11121 else
11122 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
11126 /* This function generates code for -fpic that loads %ebx with
11127 the return address of the caller and then returns. */
11129 static void
11130 ix86_code_end (void)
11132 rtx xops[2];
11133 unsigned int regno;
11135 if (indirect_return_needed)
11136 output_indirect_thunk_function (indirect_thunk_prefix_none,
11137 INVALID_REGNUM, true);
11138 if (indirect_return_bnd_needed)
11139 output_indirect_thunk_function (indirect_thunk_prefix_bnd,
11140 INVALID_REGNUM, true);
11142 if (indirect_return_via_cx)
11143 output_indirect_thunk_function (indirect_thunk_prefix_none,
11144 CX_REG, true);
11145 if (indirect_return_via_cx_bnd)
11146 output_indirect_thunk_function (indirect_thunk_prefix_bnd,
11147 CX_REG, true);
11149 if (indirect_thunk_needed)
11150 output_indirect_thunk_function (indirect_thunk_prefix_none,
11151 INVALID_REGNUM, false);
11152 if (indirect_thunk_bnd_needed)
11153 output_indirect_thunk_function (indirect_thunk_prefix_bnd,
11154 INVALID_REGNUM, false);
11156 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
11158 unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
11159 if ((indirect_thunks_used & (1 << i)))
11160 output_indirect_thunk_function (indirect_thunk_prefix_none,
11161 regno, false);
11163 if ((indirect_thunks_bnd_used & (1 << i)))
11164 output_indirect_thunk_function (indirect_thunk_prefix_bnd,
11165 regno, false);
11168 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
11170 char name[32];
11171 tree decl;
11173 if ((indirect_thunks_used & (1 << regno)))
11174 output_indirect_thunk_function (indirect_thunk_prefix_none,
11175 regno, false);
11177 if ((indirect_thunks_bnd_used & (1 << regno)))
11178 output_indirect_thunk_function (indirect_thunk_prefix_bnd,
11179 regno, false);
11181 if (!(pic_labels_used & (1 << regno)))
11182 continue;
11184 get_pc_thunk_name (name, regno);
11186 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11187 get_identifier (name),
11188 build_function_type_list (void_type_node, NULL_TREE));
11189 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11190 NULL_TREE, void_type_node);
11191 TREE_PUBLIC (decl) = 1;
11192 TREE_STATIC (decl) = 1;
11193 DECL_IGNORED_P (decl) = 1;
11195 #if TARGET_MACHO
11196 if (TARGET_MACHO)
11198 switch_to_section (darwin_sections[picbase_thunk_section]);
11199 fputs ("\t.weak_definition\t", asm_out_file);
11200 assemble_name (asm_out_file, name);
11201 fputs ("\n\t.private_extern\t", asm_out_file);
11202 assemble_name (asm_out_file, name);
11203 putc ('\n', asm_out_file);
11204 ASM_OUTPUT_LABEL (asm_out_file, name);
11205 DECL_WEAK (decl) = 1;
11207 else
11208 #endif
11209 if (USE_HIDDEN_LINKONCE)
11211 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
11213 targetm.asm_out.unique_section (decl, 0);
11214 switch_to_section (get_named_section (decl, NULL, 0));
11216 targetm.asm_out.globalize_label (asm_out_file, name);
11217 fputs ("\t.hidden\t", asm_out_file);
11218 assemble_name (asm_out_file, name);
11219 putc ('\n', asm_out_file);
11220 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
11222 else
11224 switch_to_section (text_section);
11225 ASM_OUTPUT_LABEL (asm_out_file, name);
11228 DECL_INITIAL (decl) = make_node (BLOCK);
11229 current_function_decl = decl;
11230 allocate_struct_function (decl, false);
11231 init_function_start (decl);
11232 /* We're about to hide the function body from callees of final_* by
11233 emitting it directly; tell them we're a thunk, if they care. */
11234 cfun->is_thunk = true;
11235 first_function_block_is_cold = false;
11236 /* Make sure unwind info is emitted for the thunk if needed. */
11237 final_start_function (emit_barrier (), asm_out_file, 1);
11239 /* Pad stack IP move with 4 instructions (two NOPs count
11240 as one instruction). */
11241 if (TARGET_PAD_SHORT_FUNCTION)
11243 int i = 8;
11245 while (i--)
11246 fputs ("\tnop\n", asm_out_file);
11249 xops[0] = gen_rtx_REG (Pmode, regno);
11250 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11251 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
11252 output_asm_insn ("%!ret", NULL);
11253 final_end_function ();
11254 init_insn_lengths ();
11255 free_after_compilation (cfun);
11256 set_cfun (NULL);
11257 current_function_decl = NULL;
11260 if (flag_split_stack)
11261 file_end_indicate_split_stack ();
11264 /* Emit code for the SET_GOT patterns. */
11266 const char *
11267 output_set_got (rtx dest, rtx label)
11269 rtx xops[3];
11271 xops[0] = dest;
11273 if (TARGET_VXWORKS_RTP && flag_pic)
11275 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
11276 xops[2] = gen_rtx_MEM (Pmode,
11277 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
11278 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
11280 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
11281 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
11282 an unadorned address. */
11283 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
11284 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
11285 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
11286 return "";
11289 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
11291 if (flag_pic)
11293 char name[32];
11294 get_pc_thunk_name (name, REGNO (dest));
11295 pic_labels_used |= 1 << REGNO (dest);
11297 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
11298 xops[2] = gen_rtx_MEM (QImode, xops[2]);
11299 output_asm_insn ("%!call\t%X2", xops);
11301 #if TARGET_MACHO
11302 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
11303 This is what will be referenced by the Mach-O PIC subsystem. */
11304 if (machopic_should_output_picbase_label () || !label)
11305 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
11307 /* When we are restoring the pic base at the site of a nonlocal label,
11308 and we decided to emit the pic base above, we will still output a
11309 local label used for calculating the correction offset (even though
11310 the offset will be 0 in that case). */
11311 if (label)
11312 targetm.asm_out.internal_label (asm_out_file, "L",
11313 CODE_LABEL_NUMBER (label));
11314 #endif
11316 else
11318 if (TARGET_MACHO)
11319 /* We don't need a pic base, we're not producing pic. */
11320 gcc_unreachable ();
11322 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
11323 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
11324 targetm.asm_out.internal_label (asm_out_file, "L",
11325 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
11328 if (!TARGET_MACHO)
11329 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
11331 return "";
11334 /* Generate an "push" pattern for input ARG. */
11336 static rtx
11337 gen_push (rtx arg)
11339 struct machine_function *m = cfun->machine;
11341 if (m->fs.cfa_reg == stack_pointer_rtx)
11342 m->fs.cfa_offset += UNITS_PER_WORD;
11343 m->fs.sp_offset += UNITS_PER_WORD;
11345 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11346 arg = gen_rtx_REG (word_mode, REGNO (arg));
11348 return gen_rtx_SET (gen_rtx_MEM (word_mode,
11349 gen_rtx_PRE_DEC (Pmode,
11350 stack_pointer_rtx)),
11351 arg);
11354 /* Generate an "pop" pattern for input ARG. */
11356 static rtx
11357 gen_pop (rtx arg)
11359 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11360 arg = gen_rtx_REG (word_mode, REGNO (arg));
11362 return gen_rtx_SET (arg,
11363 gen_rtx_MEM (word_mode,
11364 gen_rtx_POST_INC (Pmode,
11365 stack_pointer_rtx)));
11368 /* Return >= 0 if there is an unused call-clobbered register available
11369 for the entire function. */
11371 static unsigned int
11372 ix86_select_alt_pic_regnum (void)
11374 if (ix86_use_pseudo_pic_reg ())
11375 return INVALID_REGNUM;
11377 if (crtl->is_leaf
11378 && !crtl->profile
11379 && !ix86_current_function_calls_tls_descriptor)
11381 int i, drap;
11382 /* Can't use the same register for both PIC and DRAP. */
11383 if (crtl->drap_reg)
11384 drap = REGNO (crtl->drap_reg);
11385 else
11386 drap = -1;
11387 for (i = 2; i >= 0; --i)
11388 if (i != drap && !df_regs_ever_live_p (i))
11389 return i;
11392 return INVALID_REGNUM;
11395 /* Return true if REGNO is used by the epilogue. */
11397 bool
11398 ix86_epilogue_uses (int regno)
11400 /* If there are no caller-saved registers, we preserve all registers,
11401 except for MMX and x87 registers which aren't supported when saving
11402 and restoring registers. Don't explicitly save SP register since
11403 it is always preserved. */
11404 return (epilogue_completed
11405 && cfun->machine->no_caller_saved_registers
11406 && !fixed_regs[regno]
11407 && !STACK_REGNO_P (regno)
11408 && !MMX_REGNO_P (regno));
11411 /* Return nonzero if register REGNO can be used as a scratch register
11412 in peephole2. */
11414 static bool
11415 ix86_hard_regno_scratch_ok (unsigned int regno)
11417 /* If there are no caller-saved registers, we can't use any register
11418 as a scratch register after epilogue and use REGNO as scratch
11419 register only if it has been used before to avoid saving and
11420 restoring it. */
11421 return (!cfun->machine->no_caller_saved_registers
11422 || (!epilogue_completed
11423 && df_regs_ever_live_p (regno)));
11426 /* Return true if register class CL should be an additional allocno
11427 class. */
11429 static bool
11430 ix86_additional_allocno_class_p (reg_class_t cl)
11432 return cl == MOD4_SSE_REGS;
11435 /* Return TRUE if we need to save REGNO. */
11437 static bool
11438 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
11440 /* If there are no caller-saved registers, we preserve all registers,
11441 except for MMX and x87 registers which aren't supported when saving
11442 and restoring registers. Don't explicitly save SP register since
11443 it is always preserved. */
11444 if (cfun->machine->no_caller_saved_registers)
11446 /* Don't preserve registers used for function return value. */
11447 rtx reg = crtl->return_rtx;
11448 if (reg)
11450 unsigned int i = REGNO (reg);
11451 unsigned int nregs = REG_NREGS (reg);
11452 while (nregs-- > 0)
11453 if ((i + nregs) == regno)
11454 return false;
11456 reg = crtl->return_bnd;
11457 if (reg)
11459 i = REGNO (reg);
11460 nregs = REG_NREGS (reg);
11461 while (nregs-- > 0)
11462 if ((i + nregs) == regno)
11463 return false;
11467 return (df_regs_ever_live_p (regno)
11468 && !fixed_regs[regno]
11469 && !STACK_REGNO_P (regno)
11470 && !MMX_REGNO_P (regno)
11471 && (regno != HARD_FRAME_POINTER_REGNUM
11472 || !frame_pointer_needed));
11475 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
11476 && pic_offset_table_rtx)
11478 if (ix86_use_pseudo_pic_reg ())
11480 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
11481 _mcount in prologue. */
11482 if (!TARGET_64BIT && flag_pic && crtl->profile)
11483 return true;
11485 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11486 || crtl->profile
11487 || crtl->calls_eh_return
11488 || crtl->uses_const_pool
11489 || cfun->has_nonlocal_label)
11490 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
11493 if (crtl->calls_eh_return && maybe_eh_return)
11495 unsigned i;
11496 for (i = 0; ; i++)
11498 unsigned test = EH_RETURN_DATA_REGNO (i);
11499 if (test == INVALID_REGNUM)
11500 break;
11501 if (test == regno)
11502 return true;
11506 if (ignore_outlined && cfun->machine->call_ms2sysv)
11508 unsigned count = cfun->machine->call_ms2sysv_extra_regs
11509 + xlogue_layout::MIN_REGS;
11510 if (xlogue_layout::is_stub_managed_reg (regno, count))
11511 return false;
11514 if (crtl->drap_reg
11515 && regno == REGNO (crtl->drap_reg)
11516 && !cfun->machine->no_drap_save_restore)
11517 return true;
11519 return (df_regs_ever_live_p (regno)
11520 && !call_used_regs[regno]
11521 && !fixed_regs[regno]
11522 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
11525 /* Return number of saved general prupose registers. */
11527 static int
11528 ix86_nsaved_regs (void)
11530 int nregs = 0;
11531 int regno;
11533 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11534 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11535 nregs ++;
11536 return nregs;
11539 /* Return number of saved SSE registers. */
11541 static int
11542 ix86_nsaved_sseregs (void)
11544 int nregs = 0;
11545 int regno;
11547 if (!TARGET_64BIT_MS_ABI)
11548 return 0;
11549 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11550 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11551 nregs ++;
11552 return nregs;
11555 /* Given FROM and TO register numbers, say whether this elimination is
11556 allowed. If stack alignment is needed, we can only replace argument
11557 pointer with hard frame pointer, or replace frame pointer with stack
11558 pointer. Otherwise, frame pointer elimination is automatically
11559 handled and all other eliminations are valid. */
11561 static bool
11562 ix86_can_eliminate (const int from, const int to)
11564 if (stack_realign_fp)
11565 return ((from == ARG_POINTER_REGNUM
11566 && to == HARD_FRAME_POINTER_REGNUM)
11567 || (from == FRAME_POINTER_REGNUM
11568 && to == STACK_POINTER_REGNUM));
11569 else
11570 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
11573 /* Return the offset between two registers, one to be eliminated, and the other
11574 its replacement, at the start of a routine. */
11576 HOST_WIDE_INT
11577 ix86_initial_elimination_offset (int from, int to)
11579 struct ix86_frame &frame = cfun->machine->frame;
11581 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
11582 return frame.hard_frame_pointer_offset;
11583 else if (from == FRAME_POINTER_REGNUM
11584 && to == HARD_FRAME_POINTER_REGNUM)
11585 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
11586 else
11588 gcc_assert (to == STACK_POINTER_REGNUM);
11590 if (from == ARG_POINTER_REGNUM)
11591 return frame.stack_pointer_offset;
11593 gcc_assert (from == FRAME_POINTER_REGNUM);
11594 return frame.stack_pointer_offset - frame.frame_pointer_offset;
11598 /* In a dynamically-aligned function, we can't know the offset from
11599 stack pointer to frame pointer, so we must ensure that setjmp
11600 eliminates fp against the hard fp (%ebp) rather than trying to
11601 index from %esp up to the top of the frame across a gap that is
11602 of unknown (at compile-time) size. */
11603 static rtx
11604 ix86_builtin_setjmp_frame_value (void)
11606 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
11609 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
11610 static void warn_once_call_ms2sysv_xlogues (const char *feature)
11612 static bool warned_once = false;
11613 if (!warned_once)
11615 warning (0, "-mcall-ms2sysv-xlogues is not compatible with %s",
11616 feature);
11617 warned_once = true;
11621 /* Return the probing interval for -fstack-clash-protection. */
11623 static HOST_WIDE_INT
11624 get_probe_interval (void)
11626 if (flag_stack_clash_protection)
11627 return (HOST_WIDE_INT_1U
11628 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
11629 else
11630 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
11633 /* When using -fsplit-stack, the allocation routines set a field in
11634 the TCB to the bottom of the stack plus this much space, measured
11635 in bytes. */
11637 #define SPLIT_STACK_AVAILABLE 256
11639 /* Fill structure ix86_frame about frame of currently computed function. */
11641 static void
11642 ix86_compute_frame_layout (void)
11644 struct ix86_frame *frame = &cfun->machine->frame;
11645 struct machine_function *m = cfun->machine;
11646 unsigned HOST_WIDE_INT stack_alignment_needed;
11647 HOST_WIDE_INT offset;
11648 unsigned HOST_WIDE_INT preferred_alignment;
11649 HOST_WIDE_INT size = get_frame_size ();
11650 HOST_WIDE_INT to_allocate;
11652 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
11653 * ms_abi functions that call a sysv function. We now need to prune away
11654 * cases where it should be disabled. */
11655 if (TARGET_64BIT && m->call_ms2sysv)
11657 gcc_assert (TARGET_64BIT_MS_ABI);
11658 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
11659 gcc_assert (!TARGET_SEH);
11660 gcc_assert (TARGET_SSE);
11661 gcc_assert (!ix86_using_red_zone ());
11663 if (crtl->calls_eh_return)
11665 gcc_assert (!reload_completed);
11666 m->call_ms2sysv = false;
11667 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
11670 else if (ix86_static_chain_on_stack)
11672 gcc_assert (!reload_completed);
11673 m->call_ms2sysv = false;
11674 warn_once_call_ms2sysv_xlogues ("static call chains");
11677 /* Finally, compute which registers the stub will manage. */
11678 else
11680 unsigned count = xlogue_layout::count_stub_managed_regs ();
11681 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
11682 m->call_ms2sysv_pad_in = 0;
11686 frame->nregs = ix86_nsaved_regs ();
11687 frame->nsseregs = ix86_nsaved_sseregs ();
11689 /* 64-bit MS ABI seem to require stack alignment to be always 16,
11690 except for function prologues, leaf functions and when the defult
11691 incoming stack boundary is overriden at command line or via
11692 force_align_arg_pointer attribute. */
11693 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
11694 && (!crtl->is_leaf || cfun->calls_alloca != 0
11695 || ix86_current_function_calls_tls_descriptor
11696 || ix86_incoming_stack_boundary < 128))
11698 crtl->preferred_stack_boundary = 128;
11699 crtl->stack_alignment_needed = 128;
11702 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
11703 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
11705 gcc_assert (!size || stack_alignment_needed);
11706 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
11707 gcc_assert (preferred_alignment <= stack_alignment_needed);
11709 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
11710 gcc_assert (TARGET_64BIT || !frame->nsseregs);
11711 if (TARGET_64BIT && m->call_ms2sysv)
11713 gcc_assert (stack_alignment_needed >= 16);
11714 gcc_assert (!frame->nsseregs);
11717 /* For SEH we have to limit the amount of code movement into the prologue.
11718 At present we do this via a BLOCKAGE, at which point there's very little
11719 scheduling that can be done, which means that there's very little point
11720 in doing anything except PUSHs. */
11721 if (TARGET_SEH)
11722 m->use_fast_prologue_epilogue = false;
11723 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
11725 int count = frame->nregs;
11726 struct cgraph_node *node = cgraph_node::get (current_function_decl);
11728 /* The fast prologue uses move instead of push to save registers. This
11729 is significantly longer, but also executes faster as modern hardware
11730 can execute the moves in parallel, but can't do that for push/pop.
11732 Be careful about choosing what prologue to emit: When function takes
11733 many instructions to execute we may use slow version as well as in
11734 case function is known to be outside hot spot (this is known with
11735 feedback only). Weight the size of function by number of registers
11736 to save as it is cheap to use one or two push instructions but very
11737 slow to use many of them. */
11738 if (count)
11739 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
11740 if (node->frequency < NODE_FREQUENCY_NORMAL
11741 || (flag_branch_probabilities
11742 && node->frequency < NODE_FREQUENCY_HOT))
11743 m->use_fast_prologue_epilogue = false;
11744 else
11745 m->use_fast_prologue_epilogue
11746 = !expensive_function_p (count);
11749 frame->save_regs_using_mov
11750 = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
11751 /* If static stack checking is enabled and done with probes,
11752 the registers need to be saved before allocating the frame. */
11753 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
11755 /* Skip return address and error code in exception handler. */
11756 offset = INCOMING_FRAME_SP_OFFSET;
11758 /* Skip pushed static chain. */
11759 if (ix86_static_chain_on_stack)
11760 offset += UNITS_PER_WORD;
11762 /* Skip saved base pointer. */
11763 if (frame_pointer_needed)
11764 offset += UNITS_PER_WORD;
11765 frame->hfp_save_offset = offset;
11767 /* The traditional frame pointer location is at the top of the frame. */
11768 frame->hard_frame_pointer_offset = offset;
11770 /* Register save area */
11771 offset += frame->nregs * UNITS_PER_WORD;
11772 frame->reg_save_offset = offset;
11774 /* On SEH target, registers are pushed just before the frame pointer
11775 location. */
11776 if (TARGET_SEH)
11777 frame->hard_frame_pointer_offset = offset;
11779 /* Calculate the size of the va-arg area (not including padding, if any). */
11780 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11782 /* Also adjust stack_realign_offset for the largest alignment of
11783 stack slot actually used. */
11784 if (stack_realign_fp
11785 || (cfun->machine->max_used_stack_alignment != 0
11786 && (offset % cfun->machine->max_used_stack_alignment) != 0))
11788 /* We may need a 16-byte aligned stack for the remainder of the
11789 register save area, but the stack frame for the local function
11790 may require a greater alignment if using AVX/2/512. In order
11791 to avoid wasting space, we first calculate the space needed for
11792 the rest of the register saves, add that to the stack pointer,
11793 and then realign the stack to the boundary of the start of the
11794 frame for the local function. */
11795 HOST_WIDE_INT space_needed = 0;
11796 HOST_WIDE_INT sse_reg_space_needed = 0;
11798 if (TARGET_64BIT)
11800 if (m->call_ms2sysv)
11802 m->call_ms2sysv_pad_in = 0;
11803 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
11806 else if (frame->nsseregs)
11807 /* The only ABI that has saved SSE registers (Win64) also has a
11808 16-byte aligned default stack. However, many programs violate
11809 the ABI, and Wine64 forces stack realignment to compensate. */
11810 space_needed = frame->nsseregs * 16;
11812 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
11814 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
11815 rounding to be pedantic. */
11816 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
11818 else
11819 space_needed = frame->va_arg_size;
11821 /* Record the allocation size required prior to the realignment AND. */
11822 frame->stack_realign_allocate = space_needed;
11824 /* The re-aligned stack starts at frame->stack_realign_offset. Values
11825 before this point are not directly comparable with values below
11826 this point. Use sp_valid_at to determine if the stack pointer is
11827 valid for a given offset, fp_valid_at for the frame pointer, or
11828 choose_baseaddr to have a base register chosen for you.
11830 Note that the result of (frame->stack_realign_offset
11831 & (stack_alignment_needed - 1)) may not equal zero. */
11832 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
11833 frame->stack_realign_offset = offset - space_needed;
11834 frame->sse_reg_save_offset = frame->stack_realign_offset
11835 + sse_reg_space_needed;
11837 else
11839 frame->stack_realign_offset = offset;
11841 if (TARGET_64BIT && m->call_ms2sysv)
11843 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
11844 offset += xlogue_layout::get_instance ().get_stack_space_used ();
11847 /* Align and set SSE register save area. */
11848 else if (frame->nsseregs)
11850 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
11851 required and the DRAP re-alignment boundary is at least 16 bytes,
11852 then we want the SSE register save area properly aligned. */
11853 if (ix86_incoming_stack_boundary >= 128
11854 || (stack_realign_drap && stack_alignment_needed >= 16))
11855 offset = ROUND_UP (offset, 16);
11856 offset += frame->nsseregs * 16;
11858 frame->sse_reg_save_offset = offset;
11859 offset += frame->va_arg_size;
11862 /* Align start of frame for local function. When a function call
11863 is removed, it may become a leaf function. But if argument may
11864 be passed on stack, we need to align the stack when there is no
11865 tail call. */
11866 if (m->call_ms2sysv
11867 || frame->va_arg_size != 0
11868 || size != 0
11869 || !crtl->is_leaf
11870 || (!crtl->tail_call_emit
11871 && cfun->machine->outgoing_args_on_stack)
11872 || cfun->calls_alloca
11873 || ix86_current_function_calls_tls_descriptor)
11874 offset = ROUND_UP (offset, stack_alignment_needed);
11876 /* Frame pointer points here. */
11877 frame->frame_pointer_offset = offset;
11879 offset += size;
11881 /* Add outgoing arguments area. Can be skipped if we eliminated
11882 all the function calls as dead code.
11883 Skipping is however impossible when function calls alloca. Alloca
11884 expander assumes that last crtl->outgoing_args_size
11885 of stack frame are unused. */
11886 if (ACCUMULATE_OUTGOING_ARGS
11887 && (!crtl->is_leaf || cfun->calls_alloca
11888 || ix86_current_function_calls_tls_descriptor))
11890 offset += crtl->outgoing_args_size;
11891 frame->outgoing_arguments_size = crtl->outgoing_args_size;
11893 else
11894 frame->outgoing_arguments_size = 0;
11896 /* Align stack boundary. Only needed if we're calling another function
11897 or using alloca. */
11898 if (!crtl->is_leaf || cfun->calls_alloca
11899 || ix86_current_function_calls_tls_descriptor)
11900 offset = ROUND_UP (offset, preferred_alignment);
11902 /* We've reached end of stack frame. */
11903 frame->stack_pointer_offset = offset;
11905 /* Size prologue needs to allocate. */
11906 to_allocate = offset - frame->sse_reg_save_offset;
11908 if ((!to_allocate && frame->nregs <= 1)
11909 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
11910 /* If stack clash probing needs a loop, then it needs a
11911 scratch register. But the returned register is only guaranteed
11912 to be safe to use after register saves are complete. So if
11913 stack clash protections are enabled and the allocated frame is
11914 larger than the probe interval, then use pushes to save
11915 callee saved registers. */
11916 || (flag_stack_clash_protection && to_allocate > get_probe_interval ()))
11917 frame->save_regs_using_mov = false;
11919 if (ix86_using_red_zone ()
11920 && crtl->sp_is_unchanging
11921 && crtl->is_leaf
11922 && !ix86_pc_thunk_call_expanded
11923 && !ix86_current_function_calls_tls_descriptor)
11925 frame->red_zone_size = to_allocate;
11926 if (frame->save_regs_using_mov)
11927 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11928 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11929 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11931 else
11932 frame->red_zone_size = 0;
11933 frame->stack_pointer_offset -= frame->red_zone_size;
11935 /* The SEH frame pointer location is near the bottom of the frame.
11936 This is enforced by the fact that the difference between the
11937 stack pointer and the frame pointer is limited to 240 bytes in
11938 the unwind data structure. */
11939 if (TARGET_SEH)
11941 HOST_WIDE_INT diff;
11943 /* If we can leave the frame pointer where it is, do so. Also, returns
11944 the establisher frame for __builtin_frame_address (0). */
11945 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11946 if (diff <= SEH_MAX_FRAME_SIZE
11947 && (diff > 240 || (diff & 15) != 0)
11948 && !crtl->accesses_prior_frames)
11950 /* Ideally we'd determine what portion of the local stack frame
11951 (within the constraint of the lowest 240) is most heavily used.
11952 But without that complication, simply bias the frame pointer
11953 by 128 bytes so as to maximize the amount of the local stack
11954 frame that is addressable with 8-bit offsets. */
11955 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11960 /* This is semi-inlined memory_address_length, but simplified
11961 since we know that we're always dealing with reg+offset, and
11962 to avoid having to create and discard all that rtl. */
11964 static inline int
11965 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11967 int len = 4;
11969 if (offset == 0)
11971 /* EBP and R13 cannot be encoded without an offset. */
11972 len = (regno == BP_REG || regno == R13_REG);
11974 else if (IN_RANGE (offset, -128, 127))
11975 len = 1;
11977 /* ESP and R12 must be encoded with a SIB byte. */
11978 if (regno == SP_REG || regno == R12_REG)
11979 len++;
11981 return len;
11984 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
11985 the frame save area. The register is saved at CFA - CFA_OFFSET. */
11987 static bool
11988 sp_valid_at (HOST_WIDE_INT cfa_offset)
11990 const struct machine_frame_state &fs = cfun->machine->fs;
11991 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
11993 /* Validate that the cfa_offset isn't in a "no-man's land". */
11994 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
11995 return false;
11997 return fs.sp_valid;
12000 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
12001 the frame save area. The register is saved at CFA - CFA_OFFSET. */
12003 static inline bool
12004 fp_valid_at (HOST_WIDE_INT cfa_offset)
12006 const struct machine_frame_state &fs = cfun->machine->fs;
12007 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
12009 /* Validate that the cfa_offset isn't in a "no-man's land". */
12010 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
12011 return false;
12013 return fs.fp_valid;
12016 /* Choose a base register based upon alignment requested, speed and/or
12017 size. */
12019 static void
12020 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
12021 HOST_WIDE_INT &base_offset,
12022 unsigned int align_reqested, unsigned int *align)
12024 const struct machine_function *m = cfun->machine;
12025 unsigned int hfp_align;
12026 unsigned int drap_align;
12027 unsigned int sp_align;
12028 bool hfp_ok = fp_valid_at (cfa_offset);
12029 bool drap_ok = m->fs.drap_valid;
12030 bool sp_ok = sp_valid_at (cfa_offset);
12032 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
12034 /* Filter out any registers that don't meet the requested alignment
12035 criteria. */
12036 if (align_reqested)
12038 if (m->fs.realigned)
12039 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
12040 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
12041 notes (which we would need to use a realigned stack pointer),
12042 so disable on SEH targets. */
12043 else if (m->fs.sp_realigned)
12044 sp_align = crtl->stack_alignment_needed;
12046 hfp_ok = hfp_ok && hfp_align >= align_reqested;
12047 drap_ok = drap_ok && drap_align >= align_reqested;
12048 sp_ok = sp_ok && sp_align >= align_reqested;
12051 if (m->use_fast_prologue_epilogue)
12053 /* Choose the base register most likely to allow the most scheduling
12054 opportunities. Generally FP is valid throughout the function,
12055 while DRAP must be reloaded within the epilogue. But choose either
12056 over the SP due to increased encoding size. */
12058 if (hfp_ok)
12060 base_reg = hard_frame_pointer_rtx;
12061 base_offset = m->fs.fp_offset - cfa_offset;
12063 else if (drap_ok)
12065 base_reg = crtl->drap_reg;
12066 base_offset = 0 - cfa_offset;
12068 else if (sp_ok)
12070 base_reg = stack_pointer_rtx;
12071 base_offset = m->fs.sp_offset - cfa_offset;
12074 else
12076 HOST_WIDE_INT toffset;
12077 int len = 16, tlen;
12079 /* Choose the base register with the smallest address encoding.
12080 With a tie, choose FP > DRAP > SP. */
12081 if (sp_ok)
12083 base_reg = stack_pointer_rtx;
12084 base_offset = m->fs.sp_offset - cfa_offset;
12085 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
12087 if (drap_ok)
12089 toffset = 0 - cfa_offset;
12090 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
12091 if (tlen <= len)
12093 base_reg = crtl->drap_reg;
12094 base_offset = toffset;
12095 len = tlen;
12098 if (hfp_ok)
12100 toffset = m->fs.fp_offset - cfa_offset;
12101 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
12102 if (tlen <= len)
12104 base_reg = hard_frame_pointer_rtx;
12105 base_offset = toffset;
12106 len = tlen;
12111 /* Set the align return value. */
12112 if (align)
12114 if (base_reg == stack_pointer_rtx)
12115 *align = sp_align;
12116 else if (base_reg == crtl->drap_reg)
12117 *align = drap_align;
12118 else if (base_reg == hard_frame_pointer_rtx)
12119 *align = hfp_align;
12123 /* Return an RTX that points to CFA_OFFSET within the stack frame and
12124 the alignment of address. If ALIGN is non-null, it should point to
12125 an alignment value (in bits) that is preferred or zero and will
12126 recieve the alignment of the base register that was selected,
12127 irrespective of rather or not CFA_OFFSET is a multiple of that
12128 alignment value. If it is possible for the base register offset to be
12129 non-immediate then SCRATCH_REGNO should specify a scratch register to
12130 use.
12132 The valid base registers are taken from CFUN->MACHINE->FS. */
12134 static rtx
12135 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
12136 unsigned int scratch_regno = INVALID_REGNUM)
12138 rtx base_reg = NULL;
12139 HOST_WIDE_INT base_offset = 0;
12141 /* If a specific alignment is requested, try to get a base register
12142 with that alignment first. */
12143 if (align && *align)
12144 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
12146 if (!base_reg)
12147 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
12149 gcc_assert (base_reg != NULL);
12151 rtx base_offset_rtx = GEN_INT (base_offset);
12153 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
12155 gcc_assert (scratch_regno != INVALID_REGNUM);
12157 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12158 emit_move_insn (scratch_reg, base_offset_rtx);
12160 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
12163 return plus_constant (Pmode, base_reg, base_offset);
12166 /* Emit code to save registers in the prologue. */
12168 static void
12169 ix86_emit_save_regs (void)
12171 unsigned int regno;
12172 rtx_insn *insn;
12174 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
12175 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
12177 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
12178 RTX_FRAME_RELATED_P (insn) = 1;
12182 /* Emit a single register save at CFA - CFA_OFFSET. */
12184 static void
12185 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
12186 HOST_WIDE_INT cfa_offset)
12188 struct machine_function *m = cfun->machine;
12189 rtx reg = gen_rtx_REG (mode, regno);
12190 rtx mem, addr, base, insn;
12191 unsigned int align = GET_MODE_ALIGNMENT (mode);
12193 addr = choose_baseaddr (cfa_offset, &align);
12194 mem = gen_frame_mem (mode, addr);
12196 /* The location aligment depends upon the base register. */
12197 align = MIN (GET_MODE_ALIGNMENT (mode), align);
12198 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
12199 set_mem_align (mem, align);
12201 insn = emit_insn (gen_rtx_SET (mem, reg));
12202 RTX_FRAME_RELATED_P (insn) = 1;
12204 base = addr;
12205 if (GET_CODE (base) == PLUS)
12206 base = XEXP (base, 0);
12207 gcc_checking_assert (REG_P (base));
12209 /* When saving registers into a re-aligned local stack frame, avoid
12210 any tricky guessing by dwarf2out. */
12211 if (m->fs.realigned)
12213 gcc_checking_assert (stack_realign_drap);
12215 if (regno == REGNO (crtl->drap_reg))
12217 /* A bit of a hack. We force the DRAP register to be saved in
12218 the re-aligned stack frame, which provides us with a copy
12219 of the CFA that will last past the prologue. Install it. */
12220 gcc_checking_assert (cfun->machine->fs.fp_valid);
12221 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
12222 cfun->machine->fs.fp_offset - cfa_offset);
12223 mem = gen_rtx_MEM (mode, addr);
12224 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
12226 else
12228 /* The frame pointer is a stable reference within the
12229 aligned frame. Use it. */
12230 gcc_checking_assert (cfun->machine->fs.fp_valid);
12231 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
12232 cfun->machine->fs.fp_offset - cfa_offset);
12233 mem = gen_rtx_MEM (mode, addr);
12234 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
12238 else if (base == stack_pointer_rtx && m->fs.sp_realigned
12239 && cfa_offset >= m->fs.sp_realigned_offset)
12241 gcc_checking_assert (stack_realign_fp);
12242 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
12245 /* The memory may not be relative to the current CFA register,
12246 which means that we may need to generate a new pattern for
12247 use by the unwind info. */
12248 else if (base != m->fs.cfa_reg)
12250 addr = plus_constant (Pmode, m->fs.cfa_reg,
12251 m->fs.cfa_offset - cfa_offset);
12252 mem = gen_rtx_MEM (mode, addr);
12253 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
12257 /* Emit code to save registers using MOV insns.
12258 First register is stored at CFA - CFA_OFFSET. */
12259 static void
12260 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
12262 unsigned int regno;
12264 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12265 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
12267 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
12268 cfa_offset -= UNITS_PER_WORD;
12272 /* Emit code to save SSE registers using MOV insns.
12273 First register is stored at CFA - CFA_OFFSET. */
12274 static void
12275 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
12277 unsigned int regno;
12279 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12280 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
12282 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
12283 cfa_offset -= GET_MODE_SIZE (V4SFmode);
12287 static GTY(()) rtx queued_cfa_restores;
12289 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
12290 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
12291 Don't add the note if the previously saved value will be left untouched
12292 within stack red-zone till return, as unwinders can find the same value
12293 in the register and on the stack. */
12295 static void
12296 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
12298 if (!crtl->shrink_wrapped
12299 && cfa_offset <= cfun->machine->fs.red_zone_offset)
12300 return;
12302 if (insn)
12304 add_reg_note (insn, REG_CFA_RESTORE, reg);
12305 RTX_FRAME_RELATED_P (insn) = 1;
12307 else
12308 queued_cfa_restores
12309 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
12312 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
12314 static void
12315 ix86_add_queued_cfa_restore_notes (rtx insn)
12317 rtx last;
12318 if (!queued_cfa_restores)
12319 return;
12320 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
12322 XEXP (last, 1) = REG_NOTES (insn);
12323 REG_NOTES (insn) = queued_cfa_restores;
12324 queued_cfa_restores = NULL_RTX;
12325 RTX_FRAME_RELATED_P (insn) = 1;
12328 /* Expand prologue or epilogue stack adjustment.
12329 The pattern exist to put a dependency on all ebp-based memory accesses.
12330 STYLE should be negative if instructions should be marked as frame related,
12331 zero if %r11 register is live and cannot be freely used and positive
12332 otherwise. */
12334 static rtx
12335 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
12336 int style, bool set_cfa)
12338 struct machine_function *m = cfun->machine;
12339 rtx insn;
12340 bool add_frame_related_expr = false;
12342 if (Pmode == SImode)
12343 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
12344 else if (x86_64_immediate_operand (offset, DImode))
12345 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
12346 else
12348 rtx tmp;
12349 /* r11 is used by indirect sibcall return as well, set before the
12350 epilogue and used after the epilogue. */
12351 if (style)
12352 tmp = gen_rtx_REG (DImode, R11_REG);
12353 else
12355 gcc_assert (src != hard_frame_pointer_rtx
12356 && dest != hard_frame_pointer_rtx);
12357 tmp = hard_frame_pointer_rtx;
12359 insn = emit_insn (gen_rtx_SET (tmp, offset));
12360 if (style < 0)
12361 add_frame_related_expr = true;
12363 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
12366 insn = emit_insn (insn);
12367 if (style >= 0)
12368 ix86_add_queued_cfa_restore_notes (insn);
12370 if (set_cfa)
12372 rtx r;
12374 gcc_assert (m->fs.cfa_reg == src);
12375 m->fs.cfa_offset += INTVAL (offset);
12376 m->fs.cfa_reg = dest;
12378 r = gen_rtx_PLUS (Pmode, src, offset);
12379 r = gen_rtx_SET (dest, r);
12380 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
12381 RTX_FRAME_RELATED_P (insn) = 1;
12383 else if (style < 0)
12385 RTX_FRAME_RELATED_P (insn) = 1;
12386 if (add_frame_related_expr)
12388 rtx r = gen_rtx_PLUS (Pmode, src, offset);
12389 r = gen_rtx_SET (dest, r);
12390 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
12394 if (dest == stack_pointer_rtx)
12396 HOST_WIDE_INT ooffset = m->fs.sp_offset;
12397 bool valid = m->fs.sp_valid;
12398 bool realigned = m->fs.sp_realigned;
12400 if (src == hard_frame_pointer_rtx)
12402 valid = m->fs.fp_valid;
12403 realigned = false;
12404 ooffset = m->fs.fp_offset;
12406 else if (src == crtl->drap_reg)
12408 valid = m->fs.drap_valid;
12409 realigned = false;
12410 ooffset = 0;
12412 else
12414 /* Else there are two possibilities: SP itself, which we set
12415 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
12416 taken care of this by hand along the eh_return path. */
12417 gcc_checking_assert (src == stack_pointer_rtx
12418 || offset == const0_rtx);
12421 m->fs.sp_offset = ooffset - INTVAL (offset);
12422 m->fs.sp_valid = valid;
12423 m->fs.sp_realigned = realigned;
12425 return insn;
12428 /* Find an available register to be used as dynamic realign argument
12429 pointer regsiter. Such a register will be written in prologue and
12430 used in begin of body, so it must not be
12431 1. parameter passing register.
12432 2. GOT pointer.
12433 We reuse static-chain register if it is available. Otherwise, we
12434 use DI for i386 and R13 for x86-64. We chose R13 since it has
12435 shorter encoding.
12437 Return: the regno of chosen register. */
12439 static unsigned int
12440 find_drap_reg (void)
12442 tree decl = cfun->decl;
12444 /* Always use callee-saved register if there are no caller-saved
12445 registers. */
12446 if (TARGET_64BIT)
12448 /* Use R13 for nested function or function need static chain.
12449 Since function with tail call may use any caller-saved
12450 registers in epilogue, DRAP must not use caller-saved
12451 register in such case. */
12452 if (DECL_STATIC_CHAIN (decl)
12453 || cfun->machine->no_caller_saved_registers
12454 || crtl->tail_call_emit)
12455 return R13_REG;
12457 return R10_REG;
12459 else
12461 /* Use DI for nested function or function need static chain.
12462 Since function with tail call may use any caller-saved
12463 registers in epilogue, DRAP must not use caller-saved
12464 register in such case. */
12465 if (DECL_STATIC_CHAIN (decl)
12466 || cfun->machine->no_caller_saved_registers
12467 || crtl->tail_call_emit)
12468 return DI_REG;
12470 /* Reuse static chain register if it isn't used for parameter
12471 passing. */
12472 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
12474 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
12475 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
12476 return CX_REG;
12478 return DI_REG;
12482 /* Handle a "force_align_arg_pointer" attribute. */
12484 static tree
12485 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
12486 tree, int, bool *no_add_attrs)
12488 if (TREE_CODE (*node) != FUNCTION_TYPE
12489 && TREE_CODE (*node) != METHOD_TYPE
12490 && TREE_CODE (*node) != FIELD_DECL
12491 && TREE_CODE (*node) != TYPE_DECL)
12493 warning (OPT_Wattributes, "%qE attribute only applies to functions",
12494 name);
12495 *no_add_attrs = true;
12498 return NULL_TREE;
12501 /* Return minimum incoming stack alignment. */
12503 static unsigned int
12504 ix86_minimum_incoming_stack_boundary (bool sibcall)
12506 unsigned int incoming_stack_boundary;
12508 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
12509 if (cfun->machine->func_type != TYPE_NORMAL)
12510 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
12511 /* Prefer the one specified at command line. */
12512 else if (ix86_user_incoming_stack_boundary)
12513 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
12514 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
12515 if -mstackrealign is used, it isn't used for sibcall check and
12516 estimated stack alignment is 128bit. */
12517 else if (!sibcall
12518 && ix86_force_align_arg_pointer
12519 && crtl->stack_alignment_estimated == 128)
12520 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12521 else
12522 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
12524 /* Incoming stack alignment can be changed on individual functions
12525 via force_align_arg_pointer attribute. We use the smallest
12526 incoming stack boundary. */
12527 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
12528 && lookup_attribute (ix86_force_align_arg_pointer_string,
12529 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
12530 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12532 /* The incoming stack frame has to be aligned at least at
12533 parm_stack_boundary. */
12534 if (incoming_stack_boundary < crtl->parm_stack_boundary)
12535 incoming_stack_boundary = crtl->parm_stack_boundary;
12537 /* Stack at entrance of main is aligned by runtime. We use the
12538 smallest incoming stack boundary. */
12539 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
12540 && DECL_NAME (current_function_decl)
12541 && MAIN_NAME_P (DECL_NAME (current_function_decl))
12542 && DECL_FILE_SCOPE_P (current_function_decl))
12543 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
12545 return incoming_stack_boundary;
12548 /* Update incoming stack boundary and estimated stack alignment. */
12550 static void
12551 ix86_update_stack_boundary (void)
12553 ix86_incoming_stack_boundary
12554 = ix86_minimum_incoming_stack_boundary (false);
12556 /* x86_64 vararg needs 16byte stack alignment for register save
12557 area. */
12558 if (TARGET_64BIT
12559 && cfun->stdarg
12560 && crtl->stack_alignment_estimated < 128)
12561 crtl->stack_alignment_estimated = 128;
12563 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
12564 if (ix86_tls_descriptor_calls_expanded_in_cfun
12565 && crtl->preferred_stack_boundary < 128)
12566 crtl->preferred_stack_boundary = 128;
12569 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
12570 needed or an rtx for DRAP otherwise. */
12572 static rtx
12573 ix86_get_drap_rtx (void)
12575 /* We must use DRAP if there are outgoing arguments on stack and
12576 ACCUMULATE_OUTGOING_ARGS is false. */
12577 if (ix86_force_drap
12578 || (cfun->machine->outgoing_args_on_stack
12579 && !ACCUMULATE_OUTGOING_ARGS))
12580 crtl->need_drap = true;
12582 if (stack_realign_drap)
12584 /* Assign DRAP to vDRAP and returns vDRAP */
12585 unsigned int regno = find_drap_reg ();
12586 rtx drap_vreg;
12587 rtx arg_ptr;
12588 rtx_insn *seq, *insn;
12590 arg_ptr = gen_rtx_REG (Pmode, regno);
12591 crtl->drap_reg = arg_ptr;
12593 start_sequence ();
12594 drap_vreg = copy_to_reg (arg_ptr);
12595 seq = get_insns ();
12596 end_sequence ();
12598 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
12599 if (!optimize)
12601 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
12602 RTX_FRAME_RELATED_P (insn) = 1;
12604 return drap_vreg;
12606 else
12607 return NULL;
12610 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
12612 static rtx
12613 ix86_internal_arg_pointer (void)
12615 return virtual_incoming_args_rtx;
12618 struct scratch_reg {
12619 rtx reg;
12620 bool saved;
12623 /* Return a short-lived scratch register for use on function entry.
12624 In 32-bit mode, it is valid only after the registers are saved
12625 in the prologue. This register must be released by means of
12626 release_scratch_register_on_entry once it is dead. */
12628 static void
12629 get_scratch_register_on_entry (struct scratch_reg *sr)
12631 int regno;
12633 sr->saved = false;
12635 if (TARGET_64BIT)
12637 /* We always use R11 in 64-bit mode. */
12638 regno = R11_REG;
12640 else
12642 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12643 bool fastcall_p
12644 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12645 bool thiscall_p
12646 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12647 bool static_chain_p = DECL_STATIC_CHAIN (decl);
12648 int regparm = ix86_function_regparm (fntype, decl);
12649 int drap_regno
12650 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12652 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12653 for the static chain register. */
12654 if ((regparm < 1 || (fastcall_p && !static_chain_p))
12655 && drap_regno != AX_REG)
12656 regno = AX_REG;
12657 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12658 for the static chain register. */
12659 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12660 regno = AX_REG;
12661 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12662 regno = DX_REG;
12663 /* ecx is the static chain register. */
12664 else if (regparm < 3 && !fastcall_p && !thiscall_p
12665 && !static_chain_p
12666 && drap_regno != CX_REG)
12667 regno = CX_REG;
12668 else if (ix86_save_reg (BX_REG, true, false))
12669 regno = BX_REG;
12670 /* esi is the static chain register. */
12671 else if (!(regparm == 3 && static_chain_p)
12672 && ix86_save_reg (SI_REG, true, false))
12673 regno = SI_REG;
12674 else if (ix86_save_reg (DI_REG, true, false))
12675 regno = DI_REG;
12676 else
12678 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12679 sr->saved = true;
12683 sr->reg = gen_rtx_REG (Pmode, regno);
12684 if (sr->saved)
12686 rtx_insn *insn = emit_insn (gen_push (sr->reg));
12687 RTX_FRAME_RELATED_P (insn) = 1;
12691 /* Release a scratch register obtained from the preceding function.
12693 If RELEASE_VIA_POP is true, we just pop the register off the stack
12694 to release it. This is what non-Linux systems use with -fstack-check.
12696 Otherwise we use OFFSET to locate the saved register and the
12697 allocated stack space becomes part of the local frame and is
12698 deallocated by the epilogue. */
12700 static void
12701 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
12702 bool release_via_pop)
12704 if (sr->saved)
12706 if (release_via_pop)
12708 struct machine_function *m = cfun->machine;
12709 rtx x, insn = emit_insn (gen_pop (sr->reg));
12711 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
12712 RTX_FRAME_RELATED_P (insn) = 1;
12713 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12714 x = gen_rtx_SET (stack_pointer_rtx, x);
12715 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12716 m->fs.sp_offset -= UNITS_PER_WORD;
12718 else
12720 rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
12721 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
12722 emit_insn (x);
12727 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
12729 This differs from the next routine in that it tries hard to prevent
12730 attacks that jump the stack guard. Thus it is never allowed to allocate
12731 more than PROBE_INTERVAL bytes of stack space without a suitable
12732 probe.
12734 INT_REGISTERS_SAVED is true if integer registers have already been
12735 pushed on the stack. */
12737 static void
12738 ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size,
12739 const bool int_registers_saved)
12741 struct machine_function *m = cfun->machine;
12743 /* If this function does not statically allocate stack space, then
12744 no probes are needed. */
12745 if (!size)
12747 /* However, the allocation of space via pushes for register
12748 saves could be viewed as allocating space, but without the
12749 need to probe. */
12750 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
12751 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
12752 else
12753 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
12754 return;
12757 /* If we are a noreturn function, then we have to consider the
12758 possibility that we're called via a jump rather than a call.
12760 Thus we don't have the implicit probe generated by saving the
12761 return address into the stack at the call. Thus, the stack
12762 pointer could be anywhere in the guard page. The safe thing
12763 to do is emit a probe now.
12765 The probe can be avoided if we have already emitted any callee
12766 register saves into the stack or have a frame pointer (which will
12767 have been saved as well). Those saves will function as implicit
12768 probes.
12770 ?!? This should be revamped to work like aarch64 and s390 where
12771 we track the offset from the most recent probe. Normally that
12772 offset would be zero. For a noreturn function we would reset
12773 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
12774 we just probe when we cross PROBE_INTERVAL. */
12775 if (TREE_THIS_VOLATILE (cfun->decl)
12776 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
12778 /* We can safely use any register here since we're just going to push
12779 its value and immediately pop it back. But we do try and avoid
12780 argument passing registers so as not to introduce dependencies in
12781 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
12782 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
12783 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
12784 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
12785 m->fs.sp_offset -= UNITS_PER_WORD;
12786 if (m->fs.cfa_reg == stack_pointer_rtx)
12788 m->fs.cfa_offset -= UNITS_PER_WORD;
12789 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
12790 x = gen_rtx_SET (stack_pointer_rtx, x);
12791 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
12792 RTX_FRAME_RELATED_P (insn_push) = 1;
12793 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12794 x = gen_rtx_SET (stack_pointer_rtx, x);
12795 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
12796 RTX_FRAME_RELATED_P (insn_pop) = 1;
12798 emit_insn (gen_blockage ());
12801 /* If we allocate less than the size of the guard statically,
12802 then no probing is necessary, but we do need to allocate
12803 the stack. */
12804 if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)))
12806 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12807 GEN_INT (-size), -1,
12808 m->fs.cfa_reg == stack_pointer_rtx);
12809 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
12810 return;
12813 /* We're allocating a large enough stack frame that we need to
12814 emit probes. Either emit them inline or in a loop depending
12815 on the size. */
12816 HOST_WIDE_INT probe_interval = get_probe_interval ();
12817 if (size <= 4 * probe_interval)
12819 HOST_WIDE_INT i;
12820 for (i = probe_interval; i <= size; i += probe_interval)
12822 /* Allocate PROBE_INTERVAL bytes. */
12823 rtx insn
12824 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12825 GEN_INT (-probe_interval), -1,
12826 m->fs.cfa_reg == stack_pointer_rtx);
12827 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
12829 /* And probe at *sp. */
12830 emit_stack_probe (stack_pointer_rtx);
12831 emit_insn (gen_blockage ());
12834 /* We need to allocate space for the residual, but we do not need
12835 to probe the residual. */
12836 HOST_WIDE_INT residual = (i - probe_interval - size);
12837 if (residual)
12838 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12839 GEN_INT (residual), -1,
12840 m->fs.cfa_reg == stack_pointer_rtx);
12841 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
12843 else
12845 /* We expect the GP registers to be saved when probes are used
12846 as the probing sequences might need a scratch register and
12847 the routine to allocate one assumes the integer registers
12848 have already been saved. */
12849 gcc_assert (int_registers_saved);
12851 struct scratch_reg sr;
12852 get_scratch_register_on_entry (&sr);
12854 /* If we needed to save a register, then account for any space
12855 that was pushed (we are not going to pop the register when
12856 we do the restore). */
12857 if (sr.saved)
12858 size -= UNITS_PER_WORD;
12860 /* Step 1: round SIZE down to a multiple of the interval. */
12861 HOST_WIDE_INT rounded_size = size & -probe_interval;
12863 /* Step 2: compute final value of the loop counter. Use lea if
12864 possible. */
12865 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
12866 rtx insn;
12867 if (address_no_seg_operand (addr, Pmode))
12868 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
12869 else
12871 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12872 insn = emit_insn (gen_rtx_SET (sr.reg,
12873 gen_rtx_PLUS (Pmode, sr.reg,
12874 stack_pointer_rtx)));
12876 if (m->fs.cfa_reg == stack_pointer_rtx)
12878 add_reg_note (insn, REG_CFA_DEF_CFA,
12879 plus_constant (Pmode, sr.reg,
12880 m->fs.cfa_offset + rounded_size));
12881 RTX_FRAME_RELATED_P (insn) = 1;
12884 /* Step 3: the loop. */
12885 rtx size_rtx = GEN_INT (rounded_size);
12886 insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg,
12887 size_rtx));
12888 if (m->fs.cfa_reg == stack_pointer_rtx)
12890 m->fs.cfa_offset += rounded_size;
12891 add_reg_note (insn, REG_CFA_DEF_CFA,
12892 plus_constant (Pmode, stack_pointer_rtx,
12893 m->fs.cfa_offset));
12894 RTX_FRAME_RELATED_P (insn) = 1;
12896 m->fs.sp_offset += rounded_size;
12897 emit_insn (gen_blockage ());
12899 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
12900 is equal to ROUNDED_SIZE. */
12902 if (size != rounded_size)
12903 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12904 GEN_INT (rounded_size - size), -1,
12905 m->fs.cfa_reg == stack_pointer_rtx);
12906 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
12908 /* This does not deallocate the space reserved for the scratch
12909 register. That will be deallocated in the epilogue. */
12910 release_scratch_register_on_entry (&sr, size, false);
12913 /* Make sure nothing is scheduled before we are done. */
12914 emit_insn (gen_blockage ());
12917 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
12919 INT_REGISTERS_SAVED is true if integer registers have already been
12920 pushed on the stack. */
12922 static void
12923 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
12924 const bool int_registers_saved)
12926 /* We skip the probe for the first interval + a small dope of 4 words and
12927 probe that many bytes past the specified size to maintain a protection
12928 area at the botton of the stack. */
12929 const int dope = 4 * UNITS_PER_WORD;
12930 rtx size_rtx = GEN_INT (size), last;
12932 /* See if we have a constant small number of probes to generate. If so,
12933 that's the easy case. The run-time loop is made up of 9 insns in the
12934 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12935 for n # of intervals. */
12936 if (size <= 4 * get_probe_interval ())
12938 HOST_WIDE_INT i, adjust;
12939 bool first_probe = true;
12941 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12942 values of N from 1 until it exceeds SIZE. If only one probe is
12943 needed, this will not generate any code. Then adjust and probe
12944 to PROBE_INTERVAL + SIZE. */
12945 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
12947 if (first_probe)
12949 adjust = 2 * get_probe_interval () + dope;
12950 first_probe = false;
12952 else
12953 adjust = get_probe_interval ();
12955 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12956 plus_constant (Pmode, stack_pointer_rtx,
12957 -adjust)));
12958 emit_stack_probe (stack_pointer_rtx);
12961 if (first_probe)
12962 adjust = size + get_probe_interval () + dope;
12963 else
12964 adjust = size + get_probe_interval () - i;
12966 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12967 plus_constant (Pmode, stack_pointer_rtx,
12968 -adjust)));
12969 emit_stack_probe (stack_pointer_rtx);
12971 /* Adjust back to account for the additional first interval. */
12972 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12973 plus_constant (Pmode, stack_pointer_rtx,
12974 (get_probe_interval ()
12975 + dope))));
12978 /* Otherwise, do the same as above, but in a loop. Note that we must be
12979 extra careful with variables wrapping around because we might be at
12980 the very top (or the very bottom) of the address space and we have
12981 to be able to handle this case properly; in particular, we use an
12982 equality test for the loop condition. */
12983 else
12985 /* We expect the GP registers to be saved when probes are used
12986 as the probing sequences might need a scratch register and
12987 the routine to allocate one assumes the integer registers
12988 have already been saved. */
12989 gcc_assert (int_registers_saved);
12991 HOST_WIDE_INT rounded_size;
12992 struct scratch_reg sr;
12994 get_scratch_register_on_entry (&sr);
12996 /* If we needed to save a register, then account for any space
12997 that was pushed (we are not going to pop the register when
12998 we do the restore). */
12999 if (sr.saved)
13000 size -= UNITS_PER_WORD;
13002 /* Step 1: round SIZE to the previous multiple of the interval. */
13004 rounded_size = ROUND_DOWN (size, get_probe_interval ());
13007 /* Step 2: compute initial and final value of the loop counter. */
13009 /* SP = SP_0 + PROBE_INTERVAL. */
13010 emit_insn (gen_rtx_SET (stack_pointer_rtx,
13011 plus_constant (Pmode, stack_pointer_rtx,
13012 - (get_probe_interval () + dope))));
13014 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
13015 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
13016 emit_insn (gen_rtx_SET (sr.reg,
13017 plus_constant (Pmode, stack_pointer_rtx,
13018 -rounded_size)));
13019 else
13021 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
13022 emit_insn (gen_rtx_SET (sr.reg,
13023 gen_rtx_PLUS (Pmode, sr.reg,
13024 stack_pointer_rtx)));
13028 /* Step 3: the loop
13032 SP = SP + PROBE_INTERVAL
13033 probe at SP
13035 while (SP != LAST_ADDR)
13037 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
13038 values of N from 1 until it is equal to ROUNDED_SIZE. */
13040 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
13043 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
13044 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
13046 if (size != rounded_size)
13048 emit_insn (gen_rtx_SET (stack_pointer_rtx,
13049 plus_constant (Pmode, stack_pointer_rtx,
13050 rounded_size - size)));
13051 emit_stack_probe (stack_pointer_rtx);
13054 /* Adjust back to account for the additional first interval. */
13055 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
13056 plus_constant (Pmode, stack_pointer_rtx,
13057 (get_probe_interval ()
13058 + dope))));
13060 /* This does not deallocate the space reserved for the scratch
13061 register. That will be deallocated in the epilogue. */
13062 release_scratch_register_on_entry (&sr, size, false);
13065 /* Even if the stack pointer isn't the CFA register, we need to correctly
13066 describe the adjustments made to it, in particular differentiate the
13067 frame-related ones from the frame-unrelated ones. */
13068 if (size > 0)
13070 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
13071 XVECEXP (expr, 0, 0)
13072 = gen_rtx_SET (stack_pointer_rtx,
13073 plus_constant (Pmode, stack_pointer_rtx, -size));
13074 XVECEXP (expr, 0, 1)
13075 = gen_rtx_SET (stack_pointer_rtx,
13076 plus_constant (Pmode, stack_pointer_rtx,
13077 get_probe_interval () + dope + size));
13078 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
13079 RTX_FRAME_RELATED_P (last) = 1;
13081 cfun->machine->fs.sp_offset += size;
13084 /* Make sure nothing is scheduled before we are done. */
13085 emit_insn (gen_blockage ());
13088 /* Adjust the stack pointer up to REG while probing it. */
13090 const char *
13091 output_adjust_stack_and_probe (rtx reg)
13093 static int labelno = 0;
13094 char loop_lab[32];
13095 rtx xops[2];
13097 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
13099 /* Loop. */
13100 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
13102 /* SP = SP + PROBE_INTERVAL. */
13103 xops[0] = stack_pointer_rtx;
13104 xops[1] = GEN_INT (get_probe_interval ());
13105 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
13107 /* Probe at SP. */
13108 xops[1] = const0_rtx;
13109 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
13111 /* Test if SP == LAST_ADDR. */
13112 xops[0] = stack_pointer_rtx;
13113 xops[1] = reg;
13114 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
13116 /* Branch. */
13117 fputs ("\tjne\t", asm_out_file);
13118 assemble_name_raw (asm_out_file, loop_lab);
13119 fputc ('\n', asm_out_file);
13121 return "";
13124 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
13125 inclusive. These are offsets from the current stack pointer.
13127 INT_REGISTERS_SAVED is true if integer registers have already been
13128 pushed on the stack. */
13130 static void
13131 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
13132 const bool int_registers_saved)
13134 /* See if we have a constant small number of probes to generate. If so,
13135 that's the easy case. The run-time loop is made up of 6 insns in the
13136 generic case while the compile-time loop is made up of n insns for n #
13137 of intervals. */
13138 if (size <= 6 * get_probe_interval ())
13140 HOST_WIDE_INT i;
13142 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
13143 it exceeds SIZE. If only one probe is needed, this will not
13144 generate any code. Then probe at FIRST + SIZE. */
13145 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
13146 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
13147 -(first + i)));
13149 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
13150 -(first + size)));
13153 /* Otherwise, do the same as above, but in a loop. Note that we must be
13154 extra careful with variables wrapping around because we might be at
13155 the very top (or the very bottom) of the address space and we have
13156 to be able to handle this case properly; in particular, we use an
13157 equality test for the loop condition. */
13158 else
13160 /* We expect the GP registers to be saved when probes are used
13161 as the probing sequences might need a scratch register and
13162 the routine to allocate one assumes the integer registers
13163 have already been saved. */
13164 gcc_assert (int_registers_saved);
13166 HOST_WIDE_INT rounded_size, last;
13167 struct scratch_reg sr;
13169 get_scratch_register_on_entry (&sr);
13172 /* Step 1: round SIZE to the previous multiple of the interval. */
13174 rounded_size = ROUND_DOWN (size, get_probe_interval ());
13177 /* Step 2: compute initial and final value of the loop counter. */
13179 /* TEST_OFFSET = FIRST. */
13180 emit_move_insn (sr.reg, GEN_INT (-first));
13182 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
13183 last = first + rounded_size;
13186 /* Step 3: the loop
13190 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
13191 probe at TEST_ADDR
13193 while (TEST_ADDR != LAST_ADDR)
13195 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
13196 until it is equal to ROUNDED_SIZE. */
13198 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
13201 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
13202 that SIZE is equal to ROUNDED_SIZE. */
13204 if (size != rounded_size)
13205 emit_stack_probe (plus_constant (Pmode,
13206 gen_rtx_PLUS (Pmode,
13207 stack_pointer_rtx,
13208 sr.reg),
13209 rounded_size - size));
13211 release_scratch_register_on_entry (&sr, size, true);
13214 /* Make sure nothing is scheduled before we are done. */
13215 emit_insn (gen_blockage ());
13218 /* Probe a range of stack addresses from REG to END, inclusive. These are
13219 offsets from the current stack pointer. */
13221 const char *
13222 output_probe_stack_range (rtx reg, rtx end)
13224 static int labelno = 0;
13225 char loop_lab[32];
13226 rtx xops[3];
13228 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
13230 /* Loop. */
13231 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
13233 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
13234 xops[0] = reg;
13235 xops[1] = GEN_INT (get_probe_interval ());
13236 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
13238 /* Probe at TEST_ADDR. */
13239 xops[0] = stack_pointer_rtx;
13240 xops[1] = reg;
13241 xops[2] = const0_rtx;
13242 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
13244 /* Test if TEST_ADDR == LAST_ADDR. */
13245 xops[0] = reg;
13246 xops[1] = end;
13247 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
13249 /* Branch. */
13250 fputs ("\tjne\t", asm_out_file);
13251 assemble_name_raw (asm_out_file, loop_lab);
13252 fputc ('\n', asm_out_file);
13254 return "";
13257 /* Return true if stack frame is required. Update STACK_ALIGNMENT
13258 to the largest alignment, in bits, of stack slot used if stack
13259 frame is required and CHECK_STACK_SLOT is true. */
13261 static bool
13262 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
13263 bool check_stack_slot)
13265 HARD_REG_SET set_up_by_prologue, prologue_used;
13266 basic_block bb;
13268 CLEAR_HARD_REG_SET (prologue_used);
13269 CLEAR_HARD_REG_SET (set_up_by_prologue);
13270 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
13271 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
13272 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
13273 HARD_FRAME_POINTER_REGNUM);
13275 /* The preferred stack alignment is the minimum stack alignment. */
13276 if (stack_alignment > crtl->preferred_stack_boundary)
13277 stack_alignment = crtl->preferred_stack_boundary;
13279 bool require_stack_frame = false;
13281 FOR_EACH_BB_FN (bb, cfun)
13283 rtx_insn *insn;
13284 FOR_BB_INSNS (bb, insn)
13285 if (NONDEBUG_INSN_P (insn)
13286 && requires_stack_frame_p (insn, prologue_used,
13287 set_up_by_prologue))
13289 require_stack_frame = true;
13291 if (check_stack_slot)
13293 /* Find the maximum stack alignment. */
13294 subrtx_iterator::array_type array;
13295 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13296 if (MEM_P (*iter)
13297 && (reg_mentioned_p (stack_pointer_rtx,
13298 *iter)
13299 || reg_mentioned_p (frame_pointer_rtx,
13300 *iter)))
13302 unsigned int alignment = MEM_ALIGN (*iter);
13303 if (alignment > stack_alignment)
13304 stack_alignment = alignment;
13310 return require_stack_frame;
13313 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
13314 will guide prologue/epilogue to be generated in correct form. */
13316 static void
13317 ix86_finalize_stack_frame_flags (void)
13319 /* Check if stack realign is really needed after reload, and
13320 stores result in cfun */
13321 unsigned int incoming_stack_boundary
13322 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
13323 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
13324 unsigned int stack_alignment
13325 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
13326 ? crtl->max_used_stack_slot_alignment
13327 : crtl->stack_alignment_needed);
13328 unsigned int stack_realign
13329 = (incoming_stack_boundary < stack_alignment);
13330 bool recompute_frame_layout_p = false;
13332 if (crtl->stack_realign_finalized)
13334 /* After stack_realign_needed is finalized, we can't no longer
13335 change it. */
13336 gcc_assert (crtl->stack_realign_needed == stack_realign);
13337 return;
13340 /* If the only reason for frame_pointer_needed is that we conservatively
13341 assumed stack realignment might be needed or -fno-omit-frame-pointer
13342 is used, but in the end nothing that needed the stack alignment had
13343 been spilled nor stack access, clear frame_pointer_needed and say we
13344 don't need stack realignment. */
13345 if ((stack_realign || !flag_omit_frame_pointer)
13346 && frame_pointer_needed
13347 && crtl->is_leaf
13348 && crtl->sp_is_unchanging
13349 && !ix86_current_function_calls_tls_descriptor
13350 && !crtl->accesses_prior_frames
13351 && !cfun->calls_alloca
13352 && !crtl->calls_eh_return
13353 /* See ira_setup_eliminable_regset for the rationale. */
13354 && !(STACK_CHECK_MOVING_SP
13355 && flag_stack_check
13356 && flag_exceptions
13357 && cfun->can_throw_non_call_exceptions)
13358 && !ix86_frame_pointer_required ()
13359 && get_frame_size () == 0
13360 && ix86_nsaved_sseregs () == 0
13361 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
13363 if (ix86_find_max_used_stack_alignment (stack_alignment,
13364 stack_realign))
13366 /* Stack frame is required. If stack alignment needed is less
13367 than incoming stack boundary, don't realign stack. */
13368 stack_realign = incoming_stack_boundary < stack_alignment;
13369 if (!stack_realign)
13371 crtl->max_used_stack_slot_alignment
13372 = incoming_stack_boundary;
13373 crtl->stack_alignment_needed
13374 = incoming_stack_boundary;
13375 /* Also update preferred_stack_boundary for leaf
13376 functions. */
13377 crtl->preferred_stack_boundary
13378 = incoming_stack_boundary;
13381 else
13383 /* If drap has been set, but it actually isn't live at the
13384 start of the function, there is no reason to set it up. */
13385 if (crtl->drap_reg)
13387 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
13388 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
13389 REGNO (crtl->drap_reg)))
13391 crtl->drap_reg = NULL_RTX;
13392 crtl->need_drap = false;
13395 else
13396 cfun->machine->no_drap_save_restore = true;
13398 frame_pointer_needed = false;
13399 stack_realign = false;
13400 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
13401 crtl->stack_alignment_needed = incoming_stack_boundary;
13402 crtl->stack_alignment_estimated = incoming_stack_boundary;
13403 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
13404 crtl->preferred_stack_boundary = incoming_stack_boundary;
13405 df_finish_pass (true);
13406 df_scan_alloc (NULL);
13407 df_scan_blocks ();
13408 df_compute_regs_ever_live (true);
13409 df_analyze ();
13411 if (flag_var_tracking)
13413 /* Since frame pointer is no longer available, replace it with
13414 stack pointer - UNITS_PER_WORD in debug insns. */
13415 df_ref ref, next;
13416 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
13417 ref; ref = next)
13419 next = DF_REF_NEXT_REG (ref);
13420 if (!DF_REF_INSN_INFO (ref))
13421 continue;
13423 /* Make sure the next ref is for a different instruction,
13424 so that we're not affected by the rescan. */
13425 rtx_insn *insn = DF_REF_INSN (ref);
13426 while (next && DF_REF_INSN (next) == insn)
13427 next = DF_REF_NEXT_REG (next);
13429 if (DEBUG_INSN_P (insn))
13431 bool changed = false;
13432 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
13434 rtx *loc = DF_REF_LOC (ref);
13435 if (*loc == hard_frame_pointer_rtx)
13437 *loc = plus_constant (Pmode,
13438 stack_pointer_rtx,
13439 -UNITS_PER_WORD);
13440 changed = true;
13443 if (changed)
13444 df_insn_rescan (insn);
13449 recompute_frame_layout_p = true;
13452 else if (crtl->max_used_stack_slot_alignment
13453 > crtl->preferred_stack_boundary)
13455 /* We don't need to realign stack. But we still need to keep
13456 stack frame properly aligned to satisfy the largest alignment
13457 of stack slots. */
13458 if (ix86_find_max_used_stack_alignment (stack_alignment, true))
13459 cfun->machine->max_used_stack_alignment
13460 = stack_alignment / BITS_PER_UNIT;
13463 if (crtl->stack_realign_needed != stack_realign)
13464 recompute_frame_layout_p = true;
13465 crtl->stack_realign_needed = stack_realign;
13466 crtl->stack_realign_finalized = true;
13467 if (recompute_frame_layout_p)
13468 ix86_compute_frame_layout ();
13471 /* Delete SET_GOT right after entry block if it is allocated to reg. */
13473 static void
13474 ix86_elim_entry_set_got (rtx reg)
13476 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
13477 rtx_insn *c_insn = BB_HEAD (bb);
13478 if (!NONDEBUG_INSN_P (c_insn))
13479 c_insn = next_nonnote_nondebug_insn (c_insn);
13480 if (c_insn && NONJUMP_INSN_P (c_insn))
13482 rtx pat = PATTERN (c_insn);
13483 if (GET_CODE (pat) == PARALLEL)
13485 rtx vec = XVECEXP (pat, 0, 0);
13486 if (GET_CODE (vec) == SET
13487 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
13488 && REGNO (XEXP (vec, 0)) == REGNO (reg))
13489 delete_insn (c_insn);
13494 static rtx
13495 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
13497 rtx addr, mem;
13499 if (offset)
13500 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
13501 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
13502 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
13505 static inline rtx
13506 gen_frame_load (rtx reg, rtx frame_reg, int offset)
13508 return gen_frame_set (reg, frame_reg, offset, false);
13511 static inline rtx
13512 gen_frame_store (rtx reg, rtx frame_reg, int offset)
13514 return gen_frame_set (reg, frame_reg, offset, true);
13517 static void
13518 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
13520 struct machine_function *m = cfun->machine;
13521 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
13522 + m->call_ms2sysv_extra_regs;
13523 rtvec v = rtvec_alloc (ncregs + 1);
13524 unsigned int align, i, vi = 0;
13525 rtx_insn *insn;
13526 rtx sym, addr;
13527 rtx rax = gen_rtx_REG (word_mode, AX_REG);
13528 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
13530 /* AL should only be live with sysv_abi. */
13531 gcc_assert (!ix86_eax_live_at_start_p ());
13532 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
13534 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
13535 we've actually realigned the stack or not. */
13536 align = GET_MODE_ALIGNMENT (V4SFmode);
13537 addr = choose_baseaddr (frame.stack_realign_offset
13538 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
13539 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
13541 emit_insn (gen_rtx_SET (rax, addr));
13543 /* Get the stub symbol. */
13544 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
13545 : XLOGUE_STUB_SAVE);
13546 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
13548 for (i = 0; i < ncregs; ++i)
13550 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
13551 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
13552 r.regno);
13553 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
13556 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
13558 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
13559 RTX_FRAME_RELATED_P (insn) = true;
13562 /* Expand the prologue into a bunch of separate insns. */
13564 void
13565 ix86_expand_prologue (void)
13567 struct machine_function *m = cfun->machine;
13568 rtx insn, t;
13569 HOST_WIDE_INT allocate;
13570 bool int_registers_saved;
13571 bool sse_registers_saved;
13572 bool save_stub_call_needed;
13573 rtx static_chain = NULL_RTX;
13575 if (ix86_function_naked (current_function_decl))
13576 return;
13578 ix86_finalize_stack_frame_flags ();
13580 /* DRAP should not coexist with stack_realign_fp */
13581 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
13583 memset (&m->fs, 0, sizeof (m->fs));
13585 /* Initialize CFA state for before the prologue. */
13586 m->fs.cfa_reg = stack_pointer_rtx;
13587 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
13589 /* Track SP offset to the CFA. We continue tracking this after we've
13590 swapped the CFA register away from SP. In the case of re-alignment
13591 this is fudged; we're interested to offsets within the local frame. */
13592 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
13593 m->fs.sp_valid = true;
13594 m->fs.sp_realigned = false;
13596 const struct ix86_frame &frame = cfun->machine->frame;
13598 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
13600 /* We should have already generated an error for any use of
13601 ms_hook on a nested function. */
13602 gcc_checking_assert (!ix86_static_chain_on_stack);
13604 /* Check if profiling is active and we shall use profiling before
13605 prologue variant. If so sorry. */
13606 if (crtl->profile && flag_fentry != 0)
13607 sorry ("ms_hook_prologue attribute isn%'t compatible "
13608 "with -mfentry for 32-bit");
13610 /* In ix86_asm_output_function_label we emitted:
13611 8b ff movl.s %edi,%edi
13612 55 push %ebp
13613 8b ec movl.s %esp,%ebp
13615 This matches the hookable function prologue in Win32 API
13616 functions in Microsoft Windows XP Service Pack 2 and newer.
13617 Wine uses this to enable Windows apps to hook the Win32 API
13618 functions provided by Wine.
13620 What that means is that we've already set up the frame pointer. */
13622 if (frame_pointer_needed
13623 && !(crtl->drap_reg && crtl->stack_realign_needed))
13625 rtx push, mov;
13627 /* We've decided to use the frame pointer already set up.
13628 Describe this to the unwinder by pretending that both
13629 push and mov insns happen right here.
13631 Putting the unwind info here at the end of the ms_hook
13632 is done so that we can make absolutely certain we get
13633 the required byte sequence at the start of the function,
13634 rather than relying on an assembler that can produce
13635 the exact encoding required.
13637 However it does mean (in the unpatched case) that we have
13638 a 1 insn window where the asynchronous unwind info is
13639 incorrect. However, if we placed the unwind info at
13640 its correct location we would have incorrect unwind info
13641 in the patched case. Which is probably all moot since
13642 I don't expect Wine generates dwarf2 unwind info for the
13643 system libraries that use this feature. */
13645 insn = emit_insn (gen_blockage ());
13647 push = gen_push (hard_frame_pointer_rtx);
13648 mov = gen_rtx_SET (hard_frame_pointer_rtx,
13649 stack_pointer_rtx);
13650 RTX_FRAME_RELATED_P (push) = 1;
13651 RTX_FRAME_RELATED_P (mov) = 1;
13653 RTX_FRAME_RELATED_P (insn) = 1;
13654 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13655 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
13657 /* Note that gen_push incremented m->fs.cfa_offset, even
13658 though we didn't emit the push insn here. */
13659 m->fs.cfa_reg = hard_frame_pointer_rtx;
13660 m->fs.fp_offset = m->fs.cfa_offset;
13661 m->fs.fp_valid = true;
13663 else
13665 /* The frame pointer is not needed so pop %ebp again.
13666 This leaves us with a pristine state. */
13667 emit_insn (gen_pop (hard_frame_pointer_rtx));
13671 /* The first insn of a function that accepts its static chain on the
13672 stack is to push the register that would be filled in by a direct
13673 call. This insn will be skipped by the trampoline. */
13674 else if (ix86_static_chain_on_stack)
13676 static_chain = ix86_static_chain (cfun->decl, false);
13677 insn = emit_insn (gen_push (static_chain));
13678 emit_insn (gen_blockage ());
13680 /* We don't want to interpret this push insn as a register save,
13681 only as a stack adjustment. The real copy of the register as
13682 a save will be done later, if needed. */
13683 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
13684 t = gen_rtx_SET (stack_pointer_rtx, t);
13685 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
13686 RTX_FRAME_RELATED_P (insn) = 1;
13689 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
13690 of DRAP is needed and stack realignment is really needed after reload */
13691 if (stack_realign_drap)
13693 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
13695 /* Can't use DRAP in interrupt function. */
13696 if (cfun->machine->func_type != TYPE_NORMAL)
13697 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
13698 "in interrupt service routine. This may be worked "
13699 "around by avoiding functions with aggregate return.");
13701 /* Only need to push parameter pointer reg if it is caller saved. */
13702 if (!call_used_regs[REGNO (crtl->drap_reg)])
13704 /* Push arg pointer reg */
13705 insn = emit_insn (gen_push (crtl->drap_reg));
13706 RTX_FRAME_RELATED_P (insn) = 1;
13709 /* Grab the argument pointer. */
13710 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
13711 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13712 RTX_FRAME_RELATED_P (insn) = 1;
13713 m->fs.cfa_reg = crtl->drap_reg;
13714 m->fs.cfa_offset = 0;
13716 /* Align the stack. */
13717 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13718 stack_pointer_rtx,
13719 GEN_INT (-align_bytes)));
13720 RTX_FRAME_RELATED_P (insn) = 1;
13722 /* Replicate the return address on the stack so that return
13723 address can be reached via (argp - 1) slot. This is needed
13724 to implement macro RETURN_ADDR_RTX and intrinsic function
13725 expand_builtin_return_addr etc. */
13726 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
13727 t = gen_frame_mem (word_mode, t);
13728 insn = emit_insn (gen_push (t));
13729 RTX_FRAME_RELATED_P (insn) = 1;
13731 /* For the purposes of frame and register save area addressing,
13732 we've started over with a new frame. */
13733 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
13734 m->fs.realigned = true;
13736 if (static_chain)
13738 /* Replicate static chain on the stack so that static chain
13739 can be reached via (argp - 2) slot. This is needed for
13740 nested function with stack realignment. */
13741 insn = emit_insn (gen_push (static_chain));
13742 RTX_FRAME_RELATED_P (insn) = 1;
13746 int_registers_saved = (frame.nregs == 0);
13747 sse_registers_saved = (frame.nsseregs == 0);
13748 save_stub_call_needed = (m->call_ms2sysv);
13749 gcc_assert (sse_registers_saved || !save_stub_call_needed);
13751 if (frame_pointer_needed && !m->fs.fp_valid)
13753 /* Note: AT&T enter does NOT have reversed args. Enter is probably
13754 slower on all targets. Also sdb didn't like it. */
13755 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
13756 RTX_FRAME_RELATED_P (insn) = 1;
13758 /* Push registers now, before setting the frame pointer
13759 on SEH target. */
13760 if (!int_registers_saved
13761 && TARGET_SEH
13762 && !frame.save_regs_using_mov)
13764 ix86_emit_save_regs ();
13765 int_registers_saved = true;
13766 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13769 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
13771 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
13772 RTX_FRAME_RELATED_P (insn) = 1;
13774 if (m->fs.cfa_reg == stack_pointer_rtx)
13775 m->fs.cfa_reg = hard_frame_pointer_rtx;
13776 m->fs.fp_offset = m->fs.sp_offset;
13777 m->fs.fp_valid = true;
13781 if (!int_registers_saved)
13783 /* If saving registers via PUSH, do so now. */
13784 if (!frame.save_regs_using_mov)
13786 ix86_emit_save_regs ();
13787 int_registers_saved = true;
13788 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13791 /* When using red zone we may start register saving before allocating
13792 the stack frame saving one cycle of the prologue. However, avoid
13793 doing this if we have to probe the stack; at least on x86_64 the
13794 stack probe can turn into a call that clobbers a red zone location. */
13795 else if (ix86_using_red_zone ()
13796 && (! TARGET_STACK_PROBE
13797 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
13799 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13800 int_registers_saved = true;
13804 if (stack_realign_fp)
13806 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
13807 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
13809 /* Record last valid frame pointer offset. */
13810 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
13812 /* The computation of the size of the re-aligned stack frame means
13813 that we must allocate the size of the register save area before
13814 performing the actual alignment. Otherwise we cannot guarantee
13815 that there's enough storage above the realignment point. */
13816 allocate = frame.reg_save_offset - m->fs.sp_offset
13817 + frame.stack_realign_allocate;
13818 if (allocate)
13819 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13820 GEN_INT (-allocate), -1, false);
13822 /* Align the stack. */
13823 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13824 stack_pointer_rtx,
13825 GEN_INT (-align_bytes)));
13826 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
13827 m->fs.sp_realigned_offset = m->fs.sp_offset
13828 - frame.stack_realign_allocate;
13829 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
13830 Beyond this point, stack access should be done via choose_baseaddr or
13831 by using sp_valid_at and fp_valid_at to determine the correct base
13832 register. Henceforth, any CFA offset should be thought of as logical
13833 and not physical. */
13834 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
13835 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
13836 m->fs.sp_realigned = true;
13838 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
13839 is needed to describe where a register is saved using a realigned
13840 stack pointer, so we need to invalidate the stack pointer for that
13841 target. */
13842 if (TARGET_SEH)
13843 m->fs.sp_valid = false;
13845 /* If SP offset is non-immediate after allocation of the stack frame,
13846 then emit SSE saves or stub call prior to allocating the rest of the
13847 stack frame. This is less efficient for the out-of-line stub because
13848 we can't combine allocations across the call barrier, but it's better
13849 than using a scratch register. */
13850 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
13851 - m->fs.sp_realigned_offset),
13852 Pmode))
13854 if (!sse_registers_saved)
13856 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13857 sse_registers_saved = true;
13859 else if (save_stub_call_needed)
13861 ix86_emit_outlined_ms2sysv_save (frame);
13862 save_stub_call_needed = false;
13867 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
13869 if (flag_stack_usage_info)
13871 /* We start to count from ARG_POINTER. */
13872 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
13874 /* If it was realigned, take into account the fake frame. */
13875 if (stack_realign_drap)
13877 if (ix86_static_chain_on_stack)
13878 stack_size += UNITS_PER_WORD;
13880 if (!call_used_regs[REGNO (crtl->drap_reg)])
13881 stack_size += UNITS_PER_WORD;
13883 /* This over-estimates by 1 minimal-stack-alignment-unit but
13884 mitigates that by counting in the new return address slot. */
13885 current_function_dynamic_stack_size
13886 += crtl->stack_alignment_needed / BITS_PER_UNIT;
13889 current_function_static_stack_size = stack_size;
13892 /* On SEH target with very large frame size, allocate an area to save
13893 SSE registers (as the very large allocation won't be described). */
13894 if (TARGET_SEH
13895 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
13896 && !sse_registers_saved)
13898 HOST_WIDE_INT sse_size =
13899 frame.sse_reg_save_offset - frame.reg_save_offset;
13901 gcc_assert (int_registers_saved);
13903 /* No need to do stack checking as the area will be immediately
13904 written. */
13905 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13906 GEN_INT (-sse_size), -1,
13907 m->fs.cfa_reg == stack_pointer_rtx);
13908 allocate -= sse_size;
13909 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13910 sse_registers_saved = true;
13913 /* The stack has already been decremented by the instruction calling us
13914 so probe if the size is non-negative to preserve the protection area. */
13915 if (allocate >= 0
13916 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
13917 || flag_stack_clash_protection))
13919 if (flag_stack_clash_protection)
13921 ix86_adjust_stack_and_probe_stack_clash (allocate,
13922 int_registers_saved);
13923 allocate = 0;
13925 else if (STACK_CHECK_MOVING_SP)
13927 if (!(crtl->is_leaf && !cfun->calls_alloca
13928 && allocate <= get_probe_interval ()))
13930 ix86_adjust_stack_and_probe (allocate, int_registers_saved);
13931 allocate = 0;
13934 else
13936 HOST_WIDE_INT size = allocate;
13938 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
13939 size = 0x80000000 - get_stack_check_protect () - 1;
13941 if (TARGET_STACK_PROBE)
13943 if (crtl->is_leaf && !cfun->calls_alloca)
13945 if (size > get_probe_interval ())
13946 ix86_emit_probe_stack_range (0, size, int_registers_saved);
13948 else
13949 ix86_emit_probe_stack_range (0,
13950 size + get_stack_check_protect (),
13951 int_registers_saved);
13953 else
13955 if (crtl->is_leaf && !cfun->calls_alloca)
13957 if (size > get_probe_interval ()
13958 && size > get_stack_check_protect ())
13959 ix86_emit_probe_stack_range (get_stack_check_protect (),
13960 (size
13961 - get_stack_check_protect ()),
13962 int_registers_saved);
13964 else
13965 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
13966 int_registers_saved);
13971 if (allocate == 0)
13973 else if (!ix86_target_stack_probe ()
13974 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
13976 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13977 GEN_INT (-allocate), -1,
13978 m->fs.cfa_reg == stack_pointer_rtx);
13980 else
13982 rtx eax = gen_rtx_REG (Pmode, AX_REG);
13983 rtx r10 = NULL;
13984 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
13985 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
13986 bool eax_live = ix86_eax_live_at_start_p ();
13987 bool r10_live = false;
13989 if (TARGET_64BIT)
13990 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
13992 if (eax_live)
13994 insn = emit_insn (gen_push (eax));
13995 allocate -= UNITS_PER_WORD;
13996 /* Note that SEH directives need to continue tracking the stack
13997 pointer even after the frame pointer has been set up. */
13998 if (sp_is_cfa_reg || TARGET_SEH)
14000 if (sp_is_cfa_reg)
14001 m->fs.cfa_offset += UNITS_PER_WORD;
14002 RTX_FRAME_RELATED_P (insn) = 1;
14003 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
14004 gen_rtx_SET (stack_pointer_rtx,
14005 plus_constant (Pmode, stack_pointer_rtx,
14006 -UNITS_PER_WORD)));
14010 if (r10_live)
14012 r10 = gen_rtx_REG (Pmode, R10_REG);
14013 insn = emit_insn (gen_push (r10));
14014 allocate -= UNITS_PER_WORD;
14015 if (sp_is_cfa_reg || TARGET_SEH)
14017 if (sp_is_cfa_reg)
14018 m->fs.cfa_offset += UNITS_PER_WORD;
14019 RTX_FRAME_RELATED_P (insn) = 1;
14020 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
14021 gen_rtx_SET (stack_pointer_rtx,
14022 plus_constant (Pmode, stack_pointer_rtx,
14023 -UNITS_PER_WORD)));
14027 emit_move_insn (eax, GEN_INT (allocate));
14028 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
14030 /* Use the fact that AX still contains ALLOCATE. */
14031 adjust_stack_insn = (Pmode == DImode
14032 ? gen_pro_epilogue_adjust_stack_di_sub
14033 : gen_pro_epilogue_adjust_stack_si_sub);
14035 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
14036 stack_pointer_rtx, eax));
14038 if (sp_is_cfa_reg || TARGET_SEH)
14040 if (sp_is_cfa_reg)
14041 m->fs.cfa_offset += allocate;
14042 RTX_FRAME_RELATED_P (insn) = 1;
14043 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
14044 gen_rtx_SET (stack_pointer_rtx,
14045 plus_constant (Pmode, stack_pointer_rtx,
14046 -allocate)));
14048 m->fs.sp_offset += allocate;
14050 /* Use stack_pointer_rtx for relative addressing so that code
14051 works for realigned stack, too. */
14052 if (r10_live && eax_live)
14054 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
14055 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
14056 gen_frame_mem (word_mode, t));
14057 t = plus_constant (Pmode, t, UNITS_PER_WORD);
14058 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
14059 gen_frame_mem (word_mode, t));
14061 else if (eax_live || r10_live)
14063 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
14064 emit_move_insn (gen_rtx_REG (word_mode,
14065 (eax_live ? AX_REG : R10_REG)),
14066 gen_frame_mem (word_mode, t));
14069 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
14071 /* If we havn't already set up the frame pointer, do so now. */
14072 if (frame_pointer_needed && !m->fs.fp_valid)
14074 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
14075 GEN_INT (frame.stack_pointer_offset
14076 - frame.hard_frame_pointer_offset));
14077 insn = emit_insn (insn);
14078 RTX_FRAME_RELATED_P (insn) = 1;
14079 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
14081 if (m->fs.cfa_reg == stack_pointer_rtx)
14082 m->fs.cfa_reg = hard_frame_pointer_rtx;
14083 m->fs.fp_offset = frame.hard_frame_pointer_offset;
14084 m->fs.fp_valid = true;
14087 if (!int_registers_saved)
14088 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
14089 if (!sse_registers_saved)
14090 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
14091 else if (save_stub_call_needed)
14092 ix86_emit_outlined_ms2sysv_save (frame);
14094 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
14095 in PROLOGUE. */
14096 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
14098 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
14099 insn = emit_insn (gen_set_got (pic));
14100 RTX_FRAME_RELATED_P (insn) = 1;
14101 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
14102 emit_insn (gen_prologue_use (pic));
14103 /* Deleting already emmitted SET_GOT if exist and allocated to
14104 REAL_PIC_OFFSET_TABLE_REGNUM. */
14105 ix86_elim_entry_set_got (pic);
14108 if (crtl->drap_reg && !crtl->stack_realign_needed)
14110 /* vDRAP is setup but after reload it turns out stack realign
14111 isn't necessary, here we will emit prologue to setup DRAP
14112 without stack realign adjustment */
14113 t = choose_baseaddr (0, NULL);
14114 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
14117 /* Prevent instructions from being scheduled into register save push
14118 sequence when access to the redzone area is done through frame pointer.
14119 The offset between the frame pointer and the stack pointer is calculated
14120 relative to the value of the stack pointer at the end of the function
14121 prologue, and moving instructions that access redzone area via frame
14122 pointer inside push sequence violates this assumption. */
14123 if (frame_pointer_needed && frame.red_zone_size)
14124 emit_insn (gen_memory_blockage ());
14126 /* SEH requires that the prologue end within 256 bytes of the start of
14127 the function. Prevent instruction schedules that would extend that.
14128 Further, prevent alloca modifications to the stack pointer from being
14129 combined with prologue modifications. */
14130 if (TARGET_SEH)
14131 emit_insn (gen_prologue_use (stack_pointer_rtx));
14134 /* Emit code to restore REG using a POP insn. */
14136 static void
14137 ix86_emit_restore_reg_using_pop (rtx reg)
14139 struct machine_function *m = cfun->machine;
14140 rtx_insn *insn = emit_insn (gen_pop (reg));
14142 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
14143 m->fs.sp_offset -= UNITS_PER_WORD;
14145 if (m->fs.cfa_reg == crtl->drap_reg
14146 && REGNO (reg) == REGNO (crtl->drap_reg))
14148 /* Previously we'd represented the CFA as an expression
14149 like *(%ebp - 8). We've just popped that value from
14150 the stack, which means we need to reset the CFA to
14151 the drap register. This will remain until we restore
14152 the stack pointer. */
14153 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
14154 RTX_FRAME_RELATED_P (insn) = 1;
14156 /* This means that the DRAP register is valid for addressing too. */
14157 m->fs.drap_valid = true;
14158 return;
14161 if (m->fs.cfa_reg == stack_pointer_rtx)
14163 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14164 x = gen_rtx_SET (stack_pointer_rtx, x);
14165 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14166 RTX_FRAME_RELATED_P (insn) = 1;
14168 m->fs.cfa_offset -= UNITS_PER_WORD;
14171 /* When the frame pointer is the CFA, and we pop it, we are
14172 swapping back to the stack pointer as the CFA. This happens
14173 for stack frames that don't allocate other data, so we assume
14174 the stack pointer is now pointing at the return address, i.e.
14175 the function entry state, which makes the offset be 1 word. */
14176 if (reg == hard_frame_pointer_rtx)
14178 m->fs.fp_valid = false;
14179 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
14181 m->fs.cfa_reg = stack_pointer_rtx;
14182 m->fs.cfa_offset -= UNITS_PER_WORD;
14184 add_reg_note (insn, REG_CFA_DEF_CFA,
14185 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14186 GEN_INT (m->fs.cfa_offset)));
14187 RTX_FRAME_RELATED_P (insn) = 1;
14192 /* Emit code to restore saved registers using POP insns. */
14194 static void
14195 ix86_emit_restore_regs_using_pop (void)
14197 unsigned int regno;
14199 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
14200 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
14201 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
14204 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
14205 omits the emit and only attaches the notes. */
14207 static void
14208 ix86_emit_leave (rtx_insn *insn)
14210 struct machine_function *m = cfun->machine;
14211 if (!insn)
14212 insn = emit_insn (ix86_gen_leave ());
14214 ix86_add_queued_cfa_restore_notes (insn);
14216 gcc_assert (m->fs.fp_valid);
14217 m->fs.sp_valid = true;
14218 m->fs.sp_realigned = false;
14219 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
14220 m->fs.fp_valid = false;
14222 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
14224 m->fs.cfa_reg = stack_pointer_rtx;
14225 m->fs.cfa_offset = m->fs.sp_offset;
14227 add_reg_note (insn, REG_CFA_DEF_CFA,
14228 plus_constant (Pmode, stack_pointer_rtx,
14229 m->fs.sp_offset));
14230 RTX_FRAME_RELATED_P (insn) = 1;
14232 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
14233 m->fs.fp_offset);
14236 /* Emit code to restore saved registers using MOV insns.
14237 First register is restored from CFA - CFA_OFFSET. */
14238 static void
14239 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
14240 bool maybe_eh_return)
14242 struct machine_function *m = cfun->machine;
14243 unsigned int regno;
14245 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
14246 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
14248 rtx reg = gen_rtx_REG (word_mode, regno);
14249 rtx mem;
14250 rtx_insn *insn;
14252 mem = choose_baseaddr (cfa_offset, NULL);
14253 mem = gen_frame_mem (word_mode, mem);
14254 insn = emit_move_insn (reg, mem);
14256 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
14258 /* Previously we'd represented the CFA as an expression
14259 like *(%ebp - 8). We've just popped that value from
14260 the stack, which means we need to reset the CFA to
14261 the drap register. This will remain until we restore
14262 the stack pointer. */
14263 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
14264 RTX_FRAME_RELATED_P (insn) = 1;
14266 /* This means that the DRAP register is valid for addressing. */
14267 m->fs.drap_valid = true;
14269 else
14270 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
14272 cfa_offset -= UNITS_PER_WORD;
14276 /* Emit code to restore saved registers using MOV insns.
14277 First register is restored from CFA - CFA_OFFSET. */
14278 static void
14279 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
14280 bool maybe_eh_return)
14282 unsigned int regno;
14284 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
14285 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
14287 rtx reg = gen_rtx_REG (V4SFmode, regno);
14288 rtx mem;
14289 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
14291 mem = choose_baseaddr (cfa_offset, &align);
14292 mem = gen_rtx_MEM (V4SFmode, mem);
14294 /* The location aligment depends upon the base register. */
14295 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
14296 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
14297 set_mem_align (mem, align);
14298 emit_insn (gen_rtx_SET (reg, mem));
14300 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
14302 cfa_offset -= GET_MODE_SIZE (V4SFmode);
14306 static void
14307 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
14308 bool use_call, int style)
14310 struct machine_function *m = cfun->machine;
14311 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
14312 + m->call_ms2sysv_extra_regs;
14313 rtvec v;
14314 unsigned int elems_needed, align, i, vi = 0;
14315 rtx_insn *insn;
14316 rtx sym, tmp;
14317 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
14318 rtx r10 = NULL_RTX;
14319 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
14320 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
14321 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
14322 rtx rsi_frame_load = NULL_RTX;
14323 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
14324 enum xlogue_stub stub;
14326 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
14328 /* If using a realigned stack, we should never start with padding. */
14329 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
14331 /* Setup RSI as the stub's base pointer. */
14332 align = GET_MODE_ALIGNMENT (V4SFmode);
14333 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
14334 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
14336 emit_insn (gen_rtx_SET (rsi, tmp));
14338 /* Get a symbol for the stub. */
14339 if (frame_pointer_needed)
14340 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
14341 : XLOGUE_STUB_RESTORE_HFP_TAIL;
14342 else
14343 stub = use_call ? XLOGUE_STUB_RESTORE
14344 : XLOGUE_STUB_RESTORE_TAIL;
14345 sym = xlogue.get_stub_rtx (stub);
14347 elems_needed = ncregs;
14348 if (use_call)
14349 elems_needed += 1;
14350 else
14351 elems_needed += frame_pointer_needed ? 5 : 3;
14352 v = rtvec_alloc (elems_needed);
14354 /* We call the epilogue stub when we need to pop incoming args or we are
14355 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
14356 epilogue stub and it is the tail-call. */
14357 if (use_call)
14358 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
14359 else
14361 RTVEC_ELT (v, vi++) = ret_rtx;
14362 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
14363 if (frame_pointer_needed)
14365 rtx rbp = gen_rtx_REG (DImode, BP_REG);
14366 gcc_assert (m->fs.fp_valid);
14367 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
14369 tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
14370 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
14371 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
14372 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
14373 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
14375 else
14377 /* If no hard frame pointer, we set R10 to the SP restore value. */
14378 gcc_assert (!m->fs.fp_valid);
14379 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
14380 gcc_assert (m->fs.sp_valid);
14382 r10 = gen_rtx_REG (DImode, R10_REG);
14383 tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
14384 emit_insn (gen_rtx_SET (r10, tmp));
14386 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
14390 /* Generate frame load insns and restore notes. */
14391 for (i = 0; i < ncregs; ++i)
14393 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
14394 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
14395 rtx reg, frame_load;
14397 reg = gen_rtx_REG (mode, r.regno);
14398 frame_load = gen_frame_load (reg, rsi, r.offset);
14400 /* Save RSI frame load insn & note to add last. */
14401 if (r.regno == SI_REG)
14403 gcc_assert (!rsi_frame_load);
14404 rsi_frame_load = frame_load;
14405 rsi_restore_offset = r.offset;
14407 else
14409 RTVEC_ELT (v, vi++) = frame_load;
14410 ix86_add_cfa_restore_note (NULL, reg, r.offset);
14414 /* Add RSI frame load & restore note at the end. */
14415 gcc_assert (rsi_frame_load);
14416 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
14417 RTVEC_ELT (v, vi++) = rsi_frame_load;
14418 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
14419 rsi_restore_offset);
14421 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
14422 if (!use_call && !frame_pointer_needed)
14424 gcc_assert (m->fs.sp_valid);
14425 gcc_assert (!m->fs.sp_realigned);
14427 /* At this point, R10 should point to frame.stack_realign_offset. */
14428 if (m->fs.cfa_reg == stack_pointer_rtx)
14429 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
14430 m->fs.sp_offset = frame.stack_realign_offset;
14433 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
14434 tmp = gen_rtx_PARALLEL (VOIDmode, v);
14435 if (use_call)
14436 insn = emit_insn (tmp);
14437 else
14439 insn = emit_jump_insn (tmp);
14440 JUMP_LABEL (insn) = ret_rtx;
14442 if (frame_pointer_needed)
14443 ix86_emit_leave (insn);
14444 else
14446 /* Need CFA adjust note. */
14447 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
14448 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
14452 RTX_FRAME_RELATED_P (insn) = true;
14453 ix86_add_queued_cfa_restore_notes (insn);
14455 /* If we're not doing a tail-call, we need to adjust the stack. */
14456 if (use_call && m->fs.sp_valid)
14458 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
14459 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14460 GEN_INT (dealloc), style,
14461 m->fs.cfa_reg == stack_pointer_rtx);
14465 /* Restore function stack, frame, and registers. */
14467 void
14468 ix86_expand_epilogue (int style)
14470 struct machine_function *m = cfun->machine;
14471 struct machine_frame_state frame_state_save = m->fs;
14472 bool restore_regs_via_mov;
14473 bool using_drap;
14474 bool restore_stub_is_tail = false;
14476 if (ix86_function_naked (current_function_decl))
14478 /* The program should not reach this point. */
14479 emit_insn (gen_ud2 ());
14480 return;
14483 ix86_finalize_stack_frame_flags ();
14484 const struct ix86_frame &frame = cfun->machine->frame;
14486 m->fs.sp_realigned = stack_realign_fp;
14487 m->fs.sp_valid = stack_realign_fp
14488 || !frame_pointer_needed
14489 || crtl->sp_is_unchanging;
14490 gcc_assert (!m->fs.sp_valid
14491 || m->fs.sp_offset == frame.stack_pointer_offset);
14493 /* The FP must be valid if the frame pointer is present. */
14494 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
14495 gcc_assert (!m->fs.fp_valid
14496 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
14498 /* We must have *some* valid pointer to the stack frame. */
14499 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
14501 /* The DRAP is never valid at this point. */
14502 gcc_assert (!m->fs.drap_valid);
14504 /* See the comment about red zone and frame
14505 pointer usage in ix86_expand_prologue. */
14506 if (frame_pointer_needed && frame.red_zone_size)
14507 emit_insn (gen_memory_blockage ());
14509 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
14510 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
14512 /* Determine the CFA offset of the end of the red-zone. */
14513 m->fs.red_zone_offset = 0;
14514 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
14516 /* The red-zone begins below return address and error code in
14517 exception handler. */
14518 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
14520 /* When the register save area is in the aligned portion of
14521 the stack, determine the maximum runtime displacement that
14522 matches up with the aligned frame. */
14523 if (stack_realign_drap)
14524 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
14525 + UNITS_PER_WORD);
14528 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
14530 /* Special care must be taken for the normal return case of a function
14531 using eh_return: the eax and edx registers are marked as saved, but
14532 not restored along this path. Adjust the save location to match. */
14533 if (crtl->calls_eh_return && style != 2)
14534 reg_save_offset -= 2 * UNITS_PER_WORD;
14536 /* EH_RETURN requires the use of moves to function properly. */
14537 if (crtl->calls_eh_return)
14538 restore_regs_via_mov = true;
14539 /* SEH requires the use of pops to identify the epilogue. */
14540 else if (TARGET_SEH)
14541 restore_regs_via_mov = false;
14542 /* If we're only restoring one register and sp cannot be used then
14543 using a move instruction to restore the register since it's
14544 less work than reloading sp and popping the register. */
14545 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
14546 restore_regs_via_mov = true;
14547 else if (TARGET_EPILOGUE_USING_MOVE
14548 && cfun->machine->use_fast_prologue_epilogue
14549 && (frame.nregs > 1
14550 || m->fs.sp_offset != reg_save_offset))
14551 restore_regs_via_mov = true;
14552 else if (frame_pointer_needed
14553 && !frame.nregs
14554 && m->fs.sp_offset != reg_save_offset)
14555 restore_regs_via_mov = true;
14556 else if (frame_pointer_needed
14557 && TARGET_USE_LEAVE
14558 && cfun->machine->use_fast_prologue_epilogue
14559 && frame.nregs == 1)
14560 restore_regs_via_mov = true;
14561 else
14562 restore_regs_via_mov = false;
14564 if (restore_regs_via_mov || frame.nsseregs)
14566 /* Ensure that the entire register save area is addressable via
14567 the stack pointer, if we will restore SSE regs via sp. */
14568 if (TARGET_64BIT
14569 && m->fs.sp_offset > 0x7fffffff
14570 && sp_valid_at (frame.stack_realign_offset + 1)
14571 && (frame.nsseregs + frame.nregs) != 0)
14573 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14574 GEN_INT (m->fs.sp_offset
14575 - frame.sse_reg_save_offset),
14576 style,
14577 m->fs.cfa_reg == stack_pointer_rtx);
14581 /* If there are any SSE registers to restore, then we have to do it
14582 via moves, since there's obviously no pop for SSE regs. */
14583 if (frame.nsseregs)
14584 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
14585 style == 2);
14587 if (m->call_ms2sysv)
14589 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
14591 /* We cannot use a tail-call for the stub if:
14592 1. We have to pop incoming args,
14593 2. We have additional int regs to restore, or
14594 3. A sibling call will be the tail-call, or
14595 4. We are emitting an eh_return_internal epilogue.
14597 TODO: Item 4 has not yet tested!
14599 If any of the above are true, we will call the stub rather than
14600 jump to it. */
14601 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
14602 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
14605 /* If using out-of-line stub that is a tail-call, then...*/
14606 if (m->call_ms2sysv && restore_stub_is_tail)
14608 /* TODO: parinoid tests. (remove eventually) */
14609 gcc_assert (m->fs.sp_valid);
14610 gcc_assert (!m->fs.sp_realigned);
14611 gcc_assert (!m->fs.fp_valid);
14612 gcc_assert (!m->fs.realigned);
14613 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
14614 gcc_assert (!crtl->drap_reg);
14615 gcc_assert (!frame.nregs);
14617 else if (restore_regs_via_mov)
14619 rtx t;
14621 if (frame.nregs)
14622 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
14624 /* eh_return epilogues need %ecx added to the stack pointer. */
14625 if (style == 2)
14627 rtx sa = EH_RETURN_STACKADJ_RTX;
14628 rtx_insn *insn;
14630 /* %ecx can't be used for both DRAP register and eh_return. */
14631 if (crtl->drap_reg)
14632 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
14634 /* regparm nested functions don't work with eh_return. */
14635 gcc_assert (!ix86_static_chain_on_stack);
14637 if (frame_pointer_needed)
14639 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
14640 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
14641 emit_insn (gen_rtx_SET (sa, t));
14643 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
14644 insn = emit_move_insn (hard_frame_pointer_rtx, t);
14646 /* Note that we use SA as a temporary CFA, as the return
14647 address is at the proper place relative to it. We
14648 pretend this happens at the FP restore insn because
14649 prior to this insn the FP would be stored at the wrong
14650 offset relative to SA, and after this insn we have no
14651 other reasonable register to use for the CFA. We don't
14652 bother resetting the CFA to the SP for the duration of
14653 the return insn, unless the control flow instrumentation
14654 is done. In this case the SP is used later and we have
14655 to reset CFA to SP. */
14656 add_reg_note (insn, REG_CFA_DEF_CFA,
14657 plus_constant (Pmode, sa, UNITS_PER_WORD));
14658 ix86_add_queued_cfa_restore_notes (insn);
14659 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
14660 RTX_FRAME_RELATED_P (insn) = 1;
14662 m->fs.cfa_reg = sa;
14663 m->fs.cfa_offset = UNITS_PER_WORD;
14664 m->fs.fp_valid = false;
14666 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
14667 const0_rtx, style,
14668 flag_cf_protection);
14670 else
14672 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
14673 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
14674 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
14675 ix86_add_queued_cfa_restore_notes (insn);
14677 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
14678 if (m->fs.cfa_offset != UNITS_PER_WORD)
14680 m->fs.cfa_offset = UNITS_PER_WORD;
14681 add_reg_note (insn, REG_CFA_DEF_CFA,
14682 plus_constant (Pmode, stack_pointer_rtx,
14683 UNITS_PER_WORD));
14684 RTX_FRAME_RELATED_P (insn) = 1;
14687 m->fs.sp_offset = UNITS_PER_WORD;
14688 m->fs.sp_valid = true;
14689 m->fs.sp_realigned = false;
14692 else
14694 /* SEH requires that the function end with (1) a stack adjustment
14695 if necessary, (2) a sequence of pops, and (3) a return or
14696 jump instruction. Prevent insns from the function body from
14697 being scheduled into this sequence. */
14698 if (TARGET_SEH)
14700 /* Prevent a catch region from being adjacent to the standard
14701 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
14702 nor several other flags that would be interesting to test are
14703 set up yet. */
14704 if (flag_non_call_exceptions)
14705 emit_insn (gen_nops (const1_rtx));
14706 else
14707 emit_insn (gen_blockage ());
14710 /* First step is to deallocate the stack frame so that we can
14711 pop the registers. If the stack pointer was realigned, it needs
14712 to be restored now. Also do it on SEH target for very large
14713 frame as the emitted instructions aren't allowed by the ABI
14714 in epilogues. */
14715 if (!m->fs.sp_valid || m->fs.sp_realigned
14716 || (TARGET_SEH
14717 && (m->fs.sp_offset - reg_save_offset
14718 >= SEH_MAX_FRAME_SIZE)))
14720 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
14721 GEN_INT (m->fs.fp_offset
14722 - reg_save_offset),
14723 style, false);
14725 else if (m->fs.sp_offset != reg_save_offset)
14727 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14728 GEN_INT (m->fs.sp_offset
14729 - reg_save_offset),
14730 style,
14731 m->fs.cfa_reg == stack_pointer_rtx);
14734 ix86_emit_restore_regs_using_pop ();
14737 /* If we used a stack pointer and haven't already got rid of it,
14738 then do so now. */
14739 if (m->fs.fp_valid)
14741 /* If the stack pointer is valid and pointing at the frame
14742 pointer store address, then we only need a pop. */
14743 if (sp_valid_at (frame.hfp_save_offset)
14744 && m->fs.sp_offset == frame.hfp_save_offset)
14745 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14746 /* Leave results in shorter dependency chains on CPUs that are
14747 able to grok it fast. */
14748 else if (TARGET_USE_LEAVE
14749 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
14750 || !cfun->machine->use_fast_prologue_epilogue)
14751 ix86_emit_leave (NULL);
14752 else
14754 pro_epilogue_adjust_stack (stack_pointer_rtx,
14755 hard_frame_pointer_rtx,
14756 const0_rtx, style, !using_drap);
14757 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14761 if (using_drap)
14763 int param_ptr_offset = UNITS_PER_WORD;
14764 rtx_insn *insn;
14766 gcc_assert (stack_realign_drap);
14768 if (ix86_static_chain_on_stack)
14769 param_ptr_offset += UNITS_PER_WORD;
14770 if (!call_used_regs[REGNO (crtl->drap_reg)])
14771 param_ptr_offset += UNITS_PER_WORD;
14773 insn = emit_insn (gen_rtx_SET
14774 (stack_pointer_rtx,
14775 gen_rtx_PLUS (Pmode,
14776 crtl->drap_reg,
14777 GEN_INT (-param_ptr_offset))));
14778 m->fs.cfa_reg = stack_pointer_rtx;
14779 m->fs.cfa_offset = param_ptr_offset;
14780 m->fs.sp_offset = param_ptr_offset;
14781 m->fs.realigned = false;
14783 add_reg_note (insn, REG_CFA_DEF_CFA,
14784 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14785 GEN_INT (param_ptr_offset)));
14786 RTX_FRAME_RELATED_P (insn) = 1;
14788 if (!call_used_regs[REGNO (crtl->drap_reg)])
14789 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
14792 /* At this point the stack pointer must be valid, and we must have
14793 restored all of the registers. We may not have deallocated the
14794 entire stack frame. We've delayed this until now because it may
14795 be possible to merge the local stack deallocation with the
14796 deallocation forced by ix86_static_chain_on_stack. */
14797 gcc_assert (m->fs.sp_valid);
14798 gcc_assert (!m->fs.sp_realigned);
14799 gcc_assert (!m->fs.fp_valid);
14800 gcc_assert (!m->fs.realigned);
14801 if (m->fs.sp_offset != UNITS_PER_WORD)
14803 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14804 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
14805 style, true);
14807 else
14808 ix86_add_queued_cfa_restore_notes (get_last_insn ());
14810 /* Sibcall epilogues don't want a return instruction. */
14811 if (style == 0)
14813 m->fs = frame_state_save;
14814 return;
14817 if (cfun->machine->func_type != TYPE_NORMAL)
14818 emit_jump_insn (gen_interrupt_return ());
14819 else if (crtl->args.pops_args && crtl->args.size)
14821 rtx popc = GEN_INT (crtl->args.pops_args);
14823 /* i386 can only pop 64K bytes. If asked to pop more, pop return
14824 address, do explicit add, and jump indirectly to the caller. */
14826 if (crtl->args.pops_args >= 65536)
14828 rtx ecx = gen_rtx_REG (SImode, CX_REG);
14829 rtx_insn *insn;
14831 /* There is no "pascal" calling convention in any 64bit ABI. */
14832 gcc_assert (!TARGET_64BIT);
14834 insn = emit_insn (gen_pop (ecx));
14835 m->fs.cfa_offset -= UNITS_PER_WORD;
14836 m->fs.sp_offset -= UNITS_PER_WORD;
14838 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14839 x = gen_rtx_SET (stack_pointer_rtx, x);
14840 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14841 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
14842 RTX_FRAME_RELATED_P (insn) = 1;
14844 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14845 popc, -1, true);
14846 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
14848 else
14849 emit_jump_insn (gen_simple_return_pop_internal (popc));
14851 else if (!m->call_ms2sysv || !restore_stub_is_tail)
14853 /* In case of return from EH a simple return cannot be used
14854 as a return address will be compared with a shadow stack
14855 return address. Use indirect jump instead. */
14856 if (style == 2 && flag_cf_protection)
14858 /* Register used in indirect jump must be in word_mode. But
14859 Pmode may not be the same as word_mode for x32. */
14860 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
14861 rtx_insn *insn;
14863 insn = emit_insn (gen_pop (ecx));
14864 m->fs.cfa_offset -= UNITS_PER_WORD;
14865 m->fs.sp_offset -= UNITS_PER_WORD;
14867 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14868 x = gen_rtx_SET (stack_pointer_rtx, x);
14869 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14870 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
14871 RTX_FRAME_RELATED_P (insn) = 1;
14873 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
14875 else
14876 emit_jump_insn (gen_simple_return_internal ());
14879 /* Restore the state back to the state from the prologue,
14880 so that it's correct for the next epilogue. */
14881 m->fs = frame_state_save;
14884 /* Reset from the function's potential modifications. */
14886 static void
14887 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
14889 if (pic_offset_table_rtx
14890 && !ix86_use_pseudo_pic_reg ())
14891 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
14893 if (TARGET_MACHO)
14895 rtx_insn *insn = get_last_insn ();
14896 rtx_insn *deleted_debug_label = NULL;
14898 /* Mach-O doesn't support labels at the end of objects, so if
14899 it looks like we might want one, take special action.
14900 First, collect any sequence of deleted debug labels. */
14901 while (insn
14902 && NOTE_P (insn)
14903 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
14905 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
14906 notes only, instead set their CODE_LABEL_NUMBER to -1,
14907 otherwise there would be code generation differences
14908 in between -g and -g0. */
14909 if (NOTE_P (insn) && NOTE_KIND (insn)
14910 == NOTE_INSN_DELETED_DEBUG_LABEL)
14911 deleted_debug_label = insn;
14912 insn = PREV_INSN (insn);
14915 /* If we have:
14916 label:
14917 barrier
14918 then this needs to be detected, so skip past the barrier. */
14920 if (insn && BARRIER_P (insn))
14921 insn = PREV_INSN (insn);
14923 /* Up to now we've only seen notes or barriers. */
14924 if (insn)
14926 if (LABEL_P (insn)
14927 || (NOTE_P (insn)
14928 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
14929 /* Trailing label. */
14930 fputs ("\tnop\n", file);
14931 else if (cfun && ! cfun->is_thunk)
14933 /* See if we have a completely empty function body, skipping
14934 the special case of the picbase thunk emitted as asm. */
14935 while (insn && ! INSN_P (insn))
14936 insn = PREV_INSN (insn);
14937 /* If we don't find any insns, we've got an empty function body;
14938 I.e. completely empty - without a return or branch. This is
14939 taken as the case where a function body has been removed
14940 because it contains an inline __builtin_unreachable(). GCC
14941 declares that reaching __builtin_unreachable() means UB so
14942 we're not obliged to do anything special; however, we want
14943 non-zero-sized function bodies. To meet this, and help the
14944 user out, let's trap the case. */
14945 if (insn == NULL)
14946 fputs ("\tud2\n", file);
14949 else if (deleted_debug_label)
14950 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
14951 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
14952 CODE_LABEL_NUMBER (insn) = -1;
14956 /* Return a scratch register to use in the split stack prologue. The
14957 split stack prologue is used for -fsplit-stack. It is the first
14958 instructions in the function, even before the regular prologue.
14959 The scratch register can be any caller-saved register which is not
14960 used for parameters or for the static chain. */
14962 static unsigned int
14963 split_stack_prologue_scratch_regno (void)
14965 if (TARGET_64BIT)
14966 return R11_REG;
14967 else
14969 bool is_fastcall, is_thiscall;
14970 int regparm;
14972 is_fastcall = (lookup_attribute ("fastcall",
14973 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14974 != NULL);
14975 is_thiscall = (lookup_attribute ("thiscall",
14976 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14977 != NULL);
14978 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
14980 if (is_fastcall)
14982 if (DECL_STATIC_CHAIN (cfun->decl))
14984 sorry ("-fsplit-stack does not support fastcall with "
14985 "nested function");
14986 return INVALID_REGNUM;
14988 return AX_REG;
14990 else if (is_thiscall)
14992 if (!DECL_STATIC_CHAIN (cfun->decl))
14993 return DX_REG;
14994 return AX_REG;
14996 else if (regparm < 3)
14998 if (!DECL_STATIC_CHAIN (cfun->decl))
14999 return CX_REG;
15000 else
15002 if (regparm >= 2)
15004 sorry ("-fsplit-stack does not support 2 register "
15005 "parameters for a nested function");
15006 return INVALID_REGNUM;
15008 return DX_REG;
15011 else
15013 /* FIXME: We could make this work by pushing a register
15014 around the addition and comparison. */
15015 sorry ("-fsplit-stack does not support 3 register parameters");
15016 return INVALID_REGNUM;
15021 /* A SYMBOL_REF for the function which allocates new stackspace for
15022 -fsplit-stack. */
15024 static GTY(()) rtx split_stack_fn;
15026 /* A SYMBOL_REF for the more stack function when using the large
15027 model. */
15029 static GTY(()) rtx split_stack_fn_large;
15031 /* Return location of the stack guard value in the TLS block. */
15034 ix86_split_stack_guard (void)
15036 int offset;
15037 addr_space_t as = DEFAULT_TLS_SEG_REG;
15038 rtx r;
15040 gcc_assert (flag_split_stack);
15042 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15043 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15044 #else
15045 gcc_unreachable ();
15046 #endif
15048 r = GEN_INT (offset);
15049 r = gen_const_mem (Pmode, r);
15050 set_mem_addr_space (r, as);
15052 return r;
15055 /* Handle -fsplit-stack. These are the first instructions in the
15056 function, even before the regular prologue. */
15058 void
15059 ix86_expand_split_stack_prologue (void)
15061 HOST_WIDE_INT allocate;
15062 unsigned HOST_WIDE_INT args_size;
15063 rtx_code_label *label;
15064 rtx limit, current, allocate_rtx, call_insn, call_fusage;
15065 rtx scratch_reg = NULL_RTX;
15066 rtx_code_label *varargs_label = NULL;
15067 rtx fn;
15069 gcc_assert (flag_split_stack && reload_completed);
15071 ix86_finalize_stack_frame_flags ();
15072 struct ix86_frame &frame = cfun->machine->frame;
15073 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
15075 /* This is the label we will branch to if we have enough stack
15076 space. We expect the basic block reordering pass to reverse this
15077 branch if optimizing, so that we branch in the unlikely case. */
15078 label = gen_label_rtx ();
15080 /* We need to compare the stack pointer minus the frame size with
15081 the stack boundary in the TCB. The stack boundary always gives
15082 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
15083 can compare directly. Otherwise we need to do an addition. */
15085 limit = ix86_split_stack_guard ();
15087 if (allocate < SPLIT_STACK_AVAILABLE)
15088 current = stack_pointer_rtx;
15089 else
15091 unsigned int scratch_regno;
15092 rtx offset;
15094 /* We need a scratch register to hold the stack pointer minus
15095 the required frame size. Since this is the very start of the
15096 function, the scratch register can be any caller-saved
15097 register which is not used for parameters. */
15098 offset = GEN_INT (- allocate);
15099 scratch_regno = split_stack_prologue_scratch_regno ();
15100 if (scratch_regno == INVALID_REGNUM)
15101 return;
15102 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
15103 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
15105 /* We don't use ix86_gen_add3 in this case because it will
15106 want to split to lea, but when not optimizing the insn
15107 will not be split after this point. */
15108 emit_insn (gen_rtx_SET (scratch_reg,
15109 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15110 offset)));
15112 else
15114 emit_move_insn (scratch_reg, offset);
15115 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
15116 stack_pointer_rtx));
15118 current = scratch_reg;
15121 ix86_expand_branch (GEU, current, limit, label);
15122 rtx_insn *jump_insn = get_last_insn ();
15123 JUMP_LABEL (jump_insn) = label;
15125 /* Mark the jump as very likely to be taken. */
15126 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
15128 if (split_stack_fn == NULL_RTX)
15130 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
15131 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
15133 fn = split_stack_fn;
15135 /* Get more stack space. We pass in the desired stack space and the
15136 size of the arguments to copy to the new stack. In 32-bit mode
15137 we push the parameters; __morestack will return on a new stack
15138 anyhow. In 64-bit mode we pass the parameters in r10 and
15139 r11. */
15140 allocate_rtx = GEN_INT (allocate);
15141 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
15142 call_fusage = NULL_RTX;
15143 rtx pop = NULL_RTX;
15144 if (TARGET_64BIT)
15146 rtx reg10, reg11;
15148 reg10 = gen_rtx_REG (Pmode, R10_REG);
15149 reg11 = gen_rtx_REG (Pmode, R11_REG);
15151 /* If this function uses a static chain, it will be in %r10.
15152 Preserve it across the call to __morestack. */
15153 if (DECL_STATIC_CHAIN (cfun->decl))
15155 rtx rax;
15157 rax = gen_rtx_REG (word_mode, AX_REG);
15158 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
15159 use_reg (&call_fusage, rax);
15162 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
15163 && !TARGET_PECOFF)
15165 HOST_WIDE_INT argval;
15167 gcc_assert (Pmode == DImode);
15168 /* When using the large model we need to load the address
15169 into a register, and we've run out of registers. So we
15170 switch to a different calling convention, and we call a
15171 different function: __morestack_large. We pass the
15172 argument size in the upper 32 bits of r10 and pass the
15173 frame size in the lower 32 bits. */
15174 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
15175 gcc_assert ((args_size & 0xffffffff) == args_size);
15177 if (split_stack_fn_large == NULL_RTX)
15179 split_stack_fn_large =
15180 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
15181 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
15183 if (ix86_cmodel == CM_LARGE_PIC)
15185 rtx_code_label *label;
15186 rtx x;
15188 label = gen_label_rtx ();
15189 emit_label (label);
15190 LABEL_PRESERVE_P (label) = 1;
15191 emit_insn (gen_set_rip_rex64 (reg10, label));
15192 emit_insn (gen_set_got_offset_rex64 (reg11, label));
15193 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
15194 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
15195 UNSPEC_GOT);
15196 x = gen_rtx_CONST (Pmode, x);
15197 emit_move_insn (reg11, x);
15198 x = gen_rtx_PLUS (Pmode, reg10, reg11);
15199 x = gen_const_mem (Pmode, x);
15200 emit_move_insn (reg11, x);
15202 else
15203 emit_move_insn (reg11, split_stack_fn_large);
15205 fn = reg11;
15207 argval = ((args_size << 16) << 16) + allocate;
15208 emit_move_insn (reg10, GEN_INT (argval));
15210 else
15212 emit_move_insn (reg10, allocate_rtx);
15213 emit_move_insn (reg11, GEN_INT (args_size));
15214 use_reg (&call_fusage, reg11);
15217 use_reg (&call_fusage, reg10);
15219 else
15221 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
15222 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
15223 insn = emit_insn (gen_push (allocate_rtx));
15224 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
15225 pop = GEN_INT (2 * UNITS_PER_WORD);
15227 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
15228 GEN_INT (UNITS_PER_WORD), constm1_rtx,
15229 pop, false);
15230 add_function_usage_to (call_insn, call_fusage);
15231 if (!TARGET_64BIT)
15232 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
15233 /* Indicate that this function can't jump to non-local gotos. */
15234 make_reg_eh_region_note_nothrow_nononlocal (as_a <rtx_insn *> (call_insn));
15236 /* In order to make call/return prediction work right, we now need
15237 to execute a return instruction. See
15238 libgcc/config/i386/morestack.S for the details on how this works.
15240 For flow purposes gcc must not see this as a return
15241 instruction--we need control flow to continue at the subsequent
15242 label. Therefore, we use an unspec. */
15243 gcc_assert (crtl->args.pops_args < 65536);
15244 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
15246 /* If we are in 64-bit mode and this function uses a static chain,
15247 we saved %r10 in %rax before calling _morestack. */
15248 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
15249 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
15250 gen_rtx_REG (word_mode, AX_REG));
15252 /* If this function calls va_start, we need to store a pointer to
15253 the arguments on the old stack, because they may not have been
15254 all copied to the new stack. At this point the old stack can be
15255 found at the frame pointer value used by __morestack, because
15256 __morestack has set that up before calling back to us. Here we
15257 store that pointer in a scratch register, and in
15258 ix86_expand_prologue we store the scratch register in a stack
15259 slot. */
15260 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
15262 unsigned int scratch_regno;
15263 rtx frame_reg;
15264 int words;
15266 scratch_regno = split_stack_prologue_scratch_regno ();
15267 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
15268 frame_reg = gen_rtx_REG (Pmode, BP_REG);
15270 /* 64-bit:
15271 fp -> old fp value
15272 return address within this function
15273 return address of caller of this function
15274 stack arguments
15275 So we add three words to get to the stack arguments.
15277 32-bit:
15278 fp -> old fp value
15279 return address within this function
15280 first argument to __morestack
15281 second argument to __morestack
15282 return address of caller of this function
15283 stack arguments
15284 So we add five words to get to the stack arguments.
15286 words = TARGET_64BIT ? 3 : 5;
15287 emit_insn (gen_rtx_SET (scratch_reg,
15288 gen_rtx_PLUS (Pmode, frame_reg,
15289 GEN_INT (words * UNITS_PER_WORD))));
15291 varargs_label = gen_label_rtx ();
15292 emit_jump_insn (gen_jump (varargs_label));
15293 JUMP_LABEL (get_last_insn ()) = varargs_label;
15295 emit_barrier ();
15298 emit_label (label);
15299 LABEL_NUSES (label) = 1;
15301 /* If this function calls va_start, we now have to set the scratch
15302 register for the case where we do not call __morestack. In this
15303 case we need to set it based on the stack pointer. */
15304 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
15306 emit_insn (gen_rtx_SET (scratch_reg,
15307 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15308 GEN_INT (UNITS_PER_WORD))));
15310 emit_label (varargs_label);
15311 LABEL_NUSES (varargs_label) = 1;
15315 /* We may have to tell the dataflow pass that the split stack prologue
15316 is initializing a scratch register. */
15318 static void
15319 ix86_live_on_entry (bitmap regs)
15321 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
15323 gcc_assert (flag_split_stack);
15324 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
15328 /* Extract the parts of an RTL expression that is a valid memory address
15329 for an instruction. Return 0 if the structure of the address is
15330 grossly off. Return -1 if the address contains ASHIFT, so it is not
15331 strictly valid, but still used for computing length of lea instruction. */
15334 ix86_decompose_address (rtx addr, struct ix86_address *out)
15336 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
15337 rtx base_reg, index_reg;
15338 HOST_WIDE_INT scale = 1;
15339 rtx scale_rtx = NULL_RTX;
15340 rtx tmp;
15341 int retval = 1;
15342 addr_space_t seg = ADDR_SPACE_GENERIC;
15344 /* Allow zero-extended SImode addresses,
15345 they will be emitted with addr32 prefix. */
15346 if (TARGET_64BIT && GET_MODE (addr) == DImode)
15348 if (GET_CODE (addr) == ZERO_EXTEND
15349 && GET_MODE (XEXP (addr, 0)) == SImode)
15351 addr = XEXP (addr, 0);
15352 if (CONST_INT_P (addr))
15353 return 0;
15355 else if (GET_CODE (addr) == AND
15356 && const_32bit_mask (XEXP (addr, 1), DImode))
15358 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
15359 if (addr == NULL_RTX)
15360 return 0;
15362 if (CONST_INT_P (addr))
15363 return 0;
15367 /* Allow SImode subregs of DImode addresses,
15368 they will be emitted with addr32 prefix. */
15369 if (TARGET_64BIT && GET_MODE (addr) == SImode)
15371 if (SUBREG_P (addr)
15372 && GET_MODE (SUBREG_REG (addr)) == DImode)
15374 addr = SUBREG_REG (addr);
15375 if (CONST_INT_P (addr))
15376 return 0;
15380 if (REG_P (addr))
15381 base = addr;
15382 else if (SUBREG_P (addr))
15384 if (REG_P (SUBREG_REG (addr)))
15385 base = addr;
15386 else
15387 return 0;
15389 else if (GET_CODE (addr) == PLUS)
15391 rtx addends[4], op;
15392 int n = 0, i;
15394 op = addr;
15397 if (n >= 4)
15398 return 0;
15399 addends[n++] = XEXP (op, 1);
15400 op = XEXP (op, 0);
15402 while (GET_CODE (op) == PLUS);
15403 if (n >= 4)
15404 return 0;
15405 addends[n] = op;
15407 for (i = n; i >= 0; --i)
15409 op = addends[i];
15410 switch (GET_CODE (op))
15412 case MULT:
15413 if (index)
15414 return 0;
15415 index = XEXP (op, 0);
15416 scale_rtx = XEXP (op, 1);
15417 break;
15419 case ASHIFT:
15420 if (index)
15421 return 0;
15422 index = XEXP (op, 0);
15423 tmp = XEXP (op, 1);
15424 if (!CONST_INT_P (tmp))
15425 return 0;
15426 scale = INTVAL (tmp);
15427 if ((unsigned HOST_WIDE_INT) scale > 3)
15428 return 0;
15429 scale = 1 << scale;
15430 break;
15432 case ZERO_EXTEND:
15433 op = XEXP (op, 0);
15434 if (GET_CODE (op) != UNSPEC)
15435 return 0;
15436 /* FALLTHRU */
15438 case UNSPEC:
15439 if (XINT (op, 1) == UNSPEC_TP
15440 && TARGET_TLS_DIRECT_SEG_REFS
15441 && seg == ADDR_SPACE_GENERIC)
15442 seg = DEFAULT_TLS_SEG_REG;
15443 else
15444 return 0;
15445 break;
15447 case SUBREG:
15448 if (!REG_P (SUBREG_REG (op)))
15449 return 0;
15450 /* FALLTHRU */
15452 case REG:
15453 if (!base)
15454 base = op;
15455 else if (!index)
15456 index = op;
15457 else
15458 return 0;
15459 break;
15461 case CONST:
15462 case CONST_INT:
15463 case SYMBOL_REF:
15464 case LABEL_REF:
15465 if (disp)
15466 return 0;
15467 disp = op;
15468 break;
15470 default:
15471 return 0;
15475 else if (GET_CODE (addr) == MULT)
15477 index = XEXP (addr, 0); /* index*scale */
15478 scale_rtx = XEXP (addr, 1);
15480 else if (GET_CODE (addr) == ASHIFT)
15482 /* We're called for lea too, which implements ashift on occasion. */
15483 index = XEXP (addr, 0);
15484 tmp = XEXP (addr, 1);
15485 if (!CONST_INT_P (tmp))
15486 return 0;
15487 scale = INTVAL (tmp);
15488 if ((unsigned HOST_WIDE_INT) scale > 3)
15489 return 0;
15490 scale = 1 << scale;
15491 retval = -1;
15493 else
15494 disp = addr; /* displacement */
15496 if (index)
15498 if (REG_P (index))
15500 else if (SUBREG_P (index)
15501 && REG_P (SUBREG_REG (index)))
15503 else
15504 return 0;
15507 /* Extract the integral value of scale. */
15508 if (scale_rtx)
15510 if (!CONST_INT_P (scale_rtx))
15511 return 0;
15512 scale = INTVAL (scale_rtx);
15515 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
15516 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
15518 /* Avoid useless 0 displacement. */
15519 if (disp == const0_rtx && (base || index))
15520 disp = NULL_RTX;
15522 /* Allow arg pointer and stack pointer as index if there is not scaling. */
15523 if (base_reg && index_reg && scale == 1
15524 && (REGNO (index_reg) == ARG_POINTER_REGNUM
15525 || REGNO (index_reg) == FRAME_POINTER_REGNUM
15526 || REGNO (index_reg) == SP_REG))
15528 std::swap (base, index);
15529 std::swap (base_reg, index_reg);
15532 /* Special case: %ebp cannot be encoded as a base without a displacement.
15533 Similarly %r13. */
15534 if (!disp && base_reg
15535 && (REGNO (base_reg) == ARG_POINTER_REGNUM
15536 || REGNO (base_reg) == FRAME_POINTER_REGNUM
15537 || REGNO (base_reg) == BP_REG
15538 || REGNO (base_reg) == R13_REG))
15539 disp = const0_rtx;
15541 /* Special case: on K6, [%esi] makes the instruction vector decoded.
15542 Avoid this by transforming to [%esi+0].
15543 Reload calls address legitimization without cfun defined, so we need
15544 to test cfun for being non-NULL. */
15545 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
15546 && base_reg && !index_reg && !disp
15547 && REGNO (base_reg) == SI_REG)
15548 disp = const0_rtx;
15550 /* Special case: encode reg+reg instead of reg*2. */
15551 if (!base && index && scale == 2)
15552 base = index, base_reg = index_reg, scale = 1;
15554 /* Special case: scaling cannot be encoded without base or displacement. */
15555 if (!base && !disp && index && scale != 1)
15556 disp = const0_rtx;
15558 out->base = base;
15559 out->index = index;
15560 out->disp = disp;
15561 out->scale = scale;
15562 out->seg = seg;
15564 return retval;
15567 /* Return cost of the memory address x.
15568 For i386, it is better to use a complex address than let gcc copy
15569 the address into a reg and make a new pseudo. But not if the address
15570 requires to two regs - that would mean more pseudos with longer
15571 lifetimes. */
15572 static int
15573 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
15575 struct ix86_address parts;
15576 int cost = 1;
15577 int ok = ix86_decompose_address (x, &parts);
15579 gcc_assert (ok);
15581 if (parts.base && SUBREG_P (parts.base))
15582 parts.base = SUBREG_REG (parts.base);
15583 if (parts.index && SUBREG_P (parts.index))
15584 parts.index = SUBREG_REG (parts.index);
15586 /* Attempt to minimize number of registers in the address by increasing
15587 address cost for each used register. We don't increase address cost
15588 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
15589 is not invariant itself it most likely means that base or index is not
15590 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
15591 which is not profitable for x86. */
15592 if (parts.base
15593 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
15594 && (current_pass->type == GIMPLE_PASS
15595 || !pic_offset_table_rtx
15596 || !REG_P (parts.base)
15597 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
15598 cost++;
15600 if (parts.index
15601 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
15602 && (current_pass->type == GIMPLE_PASS
15603 || !pic_offset_table_rtx
15604 || !REG_P (parts.index)
15605 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
15606 cost++;
15608 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
15609 since it's predecode logic can't detect the length of instructions
15610 and it degenerates to vector decoded. Increase cost of such
15611 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
15612 to split such addresses or even refuse such addresses at all.
15614 Following addressing modes are affected:
15615 [base+scale*index]
15616 [scale*index+disp]
15617 [base+index]
15619 The first and last case may be avoidable by explicitly coding the zero in
15620 memory address, but I don't have AMD-K6 machine handy to check this
15621 theory. */
15623 if (TARGET_K6
15624 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
15625 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
15626 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
15627 cost += 10;
15629 return cost;
15632 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
15633 this is used for to form addresses to local data when -fPIC is in
15634 use. */
15636 static bool
15637 darwin_local_data_pic (rtx disp)
15639 return (GET_CODE (disp) == UNSPEC
15640 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
15643 /* True if operand X should be loaded from GOT. */
15645 bool
15646 ix86_force_load_from_GOT_p (rtx x)
15648 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
15649 && !TARGET_PECOFF && !TARGET_MACHO
15650 && !flag_plt && !flag_pic
15651 && ix86_cmodel != CM_LARGE
15652 && GET_CODE (x) == SYMBOL_REF
15653 && SYMBOL_REF_FUNCTION_P (x)
15654 && !SYMBOL_REF_LOCAL_P (x));
15657 /* Determine if a given RTX is a valid constant. We already know this
15658 satisfies CONSTANT_P. */
15660 static bool
15661 ix86_legitimate_constant_p (machine_mode mode, rtx x)
15663 /* Pointer bounds constants are not valid. */
15664 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
15665 return false;
15667 switch (GET_CODE (x))
15669 case CONST:
15670 x = XEXP (x, 0);
15672 if (GET_CODE (x) == PLUS)
15674 if (!CONST_INT_P (XEXP (x, 1)))
15675 return false;
15676 x = XEXP (x, 0);
15679 if (TARGET_MACHO && darwin_local_data_pic (x))
15680 return true;
15682 /* Only some unspecs are valid as "constants". */
15683 if (GET_CODE (x) == UNSPEC)
15684 switch (XINT (x, 1))
15686 case UNSPEC_GOT:
15687 case UNSPEC_GOTOFF:
15688 case UNSPEC_PLTOFF:
15689 return TARGET_64BIT;
15690 case UNSPEC_TPOFF:
15691 case UNSPEC_NTPOFF:
15692 x = XVECEXP (x, 0, 0);
15693 return (GET_CODE (x) == SYMBOL_REF
15694 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
15695 case UNSPEC_DTPOFF:
15696 x = XVECEXP (x, 0, 0);
15697 return (GET_CODE (x) == SYMBOL_REF
15698 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
15699 default:
15700 return false;
15703 /* We must have drilled down to a symbol. */
15704 if (GET_CODE (x) == LABEL_REF)
15705 return true;
15706 if (GET_CODE (x) != SYMBOL_REF)
15707 return false;
15708 /* FALLTHRU */
15710 case SYMBOL_REF:
15711 /* TLS symbols are never valid. */
15712 if (SYMBOL_REF_TLS_MODEL (x))
15713 return false;
15715 /* DLLIMPORT symbols are never valid. */
15716 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15717 && SYMBOL_REF_DLLIMPORT_P (x))
15718 return false;
15720 #if TARGET_MACHO
15721 /* mdynamic-no-pic */
15722 if (MACHO_DYNAMIC_NO_PIC_P)
15723 return machopic_symbol_defined_p (x);
15724 #endif
15726 /* External function address should be loaded
15727 via the GOT slot to avoid PLT. */
15728 if (ix86_force_load_from_GOT_p (x))
15729 return false;
15731 break;
15733 CASE_CONST_SCALAR_INT:
15734 switch (mode)
15736 case E_TImode:
15737 if (TARGET_64BIT)
15738 return true;
15739 /* FALLTHRU */
15740 case E_OImode:
15741 case E_XImode:
15742 if (!standard_sse_constant_p (x, mode))
15743 return false;
15744 default:
15745 break;
15747 break;
15749 case CONST_VECTOR:
15750 if (!standard_sse_constant_p (x, mode))
15751 return false;
15753 default:
15754 break;
15757 /* Otherwise we handle everything else in the move patterns. */
15758 return true;
15761 /* Determine if it's legal to put X into the constant pool. This
15762 is not possible for the address of thread-local symbols, which
15763 is checked above. */
15765 static bool
15766 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
15768 /* We can put any immediate constant in memory. */
15769 switch (GET_CODE (x))
15771 CASE_CONST_ANY:
15772 return false;
15774 default:
15775 break;
15778 return !ix86_legitimate_constant_p (mode, x);
15781 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
15782 otherwise zero. */
15784 static bool
15785 is_imported_p (rtx x)
15787 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
15788 || GET_CODE (x) != SYMBOL_REF)
15789 return false;
15791 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
15795 /* Nonzero if the constant value X is a legitimate general operand
15796 when generating PIC code. It is given that flag_pic is on and
15797 that X satisfies CONSTANT_P. */
15799 bool
15800 legitimate_pic_operand_p (rtx x)
15802 rtx inner;
15804 switch (GET_CODE (x))
15806 case CONST:
15807 inner = XEXP (x, 0);
15808 if (GET_CODE (inner) == PLUS
15809 && CONST_INT_P (XEXP (inner, 1)))
15810 inner = XEXP (inner, 0);
15812 /* Only some unspecs are valid as "constants". */
15813 if (GET_CODE (inner) == UNSPEC)
15814 switch (XINT (inner, 1))
15816 case UNSPEC_GOT:
15817 case UNSPEC_GOTOFF:
15818 case UNSPEC_PLTOFF:
15819 return TARGET_64BIT;
15820 case UNSPEC_TPOFF:
15821 x = XVECEXP (inner, 0, 0);
15822 return (GET_CODE (x) == SYMBOL_REF
15823 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
15824 case UNSPEC_MACHOPIC_OFFSET:
15825 return legitimate_pic_address_disp_p (x);
15826 default:
15827 return false;
15829 /* FALLTHRU */
15831 case SYMBOL_REF:
15832 case LABEL_REF:
15833 return legitimate_pic_address_disp_p (x);
15835 default:
15836 return true;
15840 /* Determine if a given CONST RTX is a valid memory displacement
15841 in PIC mode. */
15843 bool
15844 legitimate_pic_address_disp_p (rtx disp)
15846 bool saw_plus;
15848 /* In 64bit mode we can allow direct addresses of symbols and labels
15849 when they are not dynamic symbols. */
15850 if (TARGET_64BIT)
15852 rtx op0 = disp, op1;
15854 switch (GET_CODE (disp))
15856 case LABEL_REF:
15857 return true;
15859 case CONST:
15860 if (GET_CODE (XEXP (disp, 0)) != PLUS)
15861 break;
15862 op0 = XEXP (XEXP (disp, 0), 0);
15863 op1 = XEXP (XEXP (disp, 0), 1);
15864 if (!CONST_INT_P (op1))
15865 break;
15866 if (GET_CODE (op0) == UNSPEC
15867 && (XINT (op0, 1) == UNSPEC_DTPOFF
15868 || XINT (op0, 1) == UNSPEC_NTPOFF)
15869 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
15870 return true;
15871 if (INTVAL (op1) >= 16*1024*1024
15872 || INTVAL (op1) < -16*1024*1024)
15873 break;
15874 if (GET_CODE (op0) == LABEL_REF)
15875 return true;
15876 if (GET_CODE (op0) == CONST
15877 && GET_CODE (XEXP (op0, 0)) == UNSPEC
15878 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
15879 return true;
15880 if (GET_CODE (op0) == UNSPEC
15881 && XINT (op0, 1) == UNSPEC_PCREL)
15882 return true;
15883 if (GET_CODE (op0) != SYMBOL_REF)
15884 break;
15885 /* FALLTHRU */
15887 case SYMBOL_REF:
15888 /* TLS references should always be enclosed in UNSPEC.
15889 The dllimported symbol needs always to be resolved. */
15890 if (SYMBOL_REF_TLS_MODEL (op0)
15891 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
15892 return false;
15894 if (TARGET_PECOFF)
15896 if (is_imported_p (op0))
15897 return true;
15899 if (SYMBOL_REF_FAR_ADDR_P (op0)
15900 || !SYMBOL_REF_LOCAL_P (op0))
15901 break;
15903 /* Function-symbols need to be resolved only for
15904 large-model.
15905 For the small-model we don't need to resolve anything
15906 here. */
15907 if ((ix86_cmodel != CM_LARGE_PIC
15908 && SYMBOL_REF_FUNCTION_P (op0))
15909 || ix86_cmodel == CM_SMALL_PIC)
15910 return true;
15911 /* Non-external symbols don't need to be resolved for
15912 large, and medium-model. */
15913 if ((ix86_cmodel == CM_LARGE_PIC
15914 || ix86_cmodel == CM_MEDIUM_PIC)
15915 && !SYMBOL_REF_EXTERNAL_P (op0))
15916 return true;
15918 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
15919 && (SYMBOL_REF_LOCAL_P (op0)
15920 || (HAVE_LD_PIE_COPYRELOC
15921 && flag_pie
15922 && !SYMBOL_REF_WEAK (op0)
15923 && !SYMBOL_REF_FUNCTION_P (op0)))
15924 && ix86_cmodel != CM_LARGE_PIC)
15925 return true;
15926 break;
15928 default:
15929 break;
15932 if (GET_CODE (disp) != CONST)
15933 return false;
15934 disp = XEXP (disp, 0);
15936 if (TARGET_64BIT)
15938 /* We are unsafe to allow PLUS expressions. This limit allowed distance
15939 of GOT tables. We should not need these anyway. */
15940 if (GET_CODE (disp) != UNSPEC
15941 || (XINT (disp, 1) != UNSPEC_GOTPCREL
15942 && XINT (disp, 1) != UNSPEC_GOTOFF
15943 && XINT (disp, 1) != UNSPEC_PCREL
15944 && XINT (disp, 1) != UNSPEC_PLTOFF))
15945 return false;
15947 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
15948 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
15949 return false;
15950 return true;
15953 saw_plus = false;
15954 if (GET_CODE (disp) == PLUS)
15956 if (!CONST_INT_P (XEXP (disp, 1)))
15957 return false;
15958 disp = XEXP (disp, 0);
15959 saw_plus = true;
15962 if (TARGET_MACHO && darwin_local_data_pic (disp))
15963 return true;
15965 if (GET_CODE (disp) != UNSPEC)
15966 return false;
15968 switch (XINT (disp, 1))
15970 case UNSPEC_GOT:
15971 if (saw_plus)
15972 return false;
15973 /* We need to check for both symbols and labels because VxWorks loads
15974 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
15975 details. */
15976 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15977 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
15978 case UNSPEC_GOTOFF:
15979 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
15980 While ABI specify also 32bit relocation but we don't produce it in
15981 small PIC model at all. */
15982 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15983 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
15984 && !TARGET_64BIT)
15985 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
15986 return false;
15987 case UNSPEC_GOTTPOFF:
15988 case UNSPEC_GOTNTPOFF:
15989 case UNSPEC_INDNTPOFF:
15990 if (saw_plus)
15991 return false;
15992 disp = XVECEXP (disp, 0, 0);
15993 return (GET_CODE (disp) == SYMBOL_REF
15994 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
15995 case UNSPEC_NTPOFF:
15996 disp = XVECEXP (disp, 0, 0);
15997 return (GET_CODE (disp) == SYMBOL_REF
15998 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
15999 case UNSPEC_DTPOFF:
16000 disp = XVECEXP (disp, 0, 0);
16001 return (GET_CODE (disp) == SYMBOL_REF
16002 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
16005 return false;
16008 /* Determine if op is suitable RTX for an address register.
16009 Return naked register if a register or a register subreg is
16010 found, otherwise return NULL_RTX. */
16012 static rtx
16013 ix86_validate_address_register (rtx op)
16015 machine_mode mode = GET_MODE (op);
16017 /* Only SImode or DImode registers can form the address. */
16018 if (mode != SImode && mode != DImode)
16019 return NULL_RTX;
16021 if (REG_P (op))
16022 return op;
16023 else if (SUBREG_P (op))
16025 rtx reg = SUBREG_REG (op);
16027 if (!REG_P (reg))
16028 return NULL_RTX;
16030 mode = GET_MODE (reg);
16032 /* Don't allow SUBREGs that span more than a word. It can
16033 lead to spill failures when the register is one word out
16034 of a two word structure. */
16035 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16036 return NULL_RTX;
16038 /* Allow only SUBREGs of non-eliminable hard registers. */
16039 if (register_no_elim_operand (reg, mode))
16040 return reg;
16043 /* Op is not a register. */
16044 return NULL_RTX;
16047 /* Recognizes RTL expressions that are valid memory addresses for an
16048 instruction. The MODE argument is the machine mode for the MEM
16049 expression that wants to use this address.
16051 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
16052 convert common non-canonical forms to canonical form so that they will
16053 be recognized. */
16055 static bool
16056 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
16058 struct ix86_address parts;
16059 rtx base, index, disp;
16060 HOST_WIDE_INT scale;
16061 addr_space_t seg;
16063 if (ix86_decompose_address (addr, &parts) <= 0)
16064 /* Decomposition failed. */
16065 return false;
16067 base = parts.base;
16068 index = parts.index;
16069 disp = parts.disp;
16070 scale = parts.scale;
16071 seg = parts.seg;
16073 /* Validate base register. */
16074 if (base)
16076 rtx reg = ix86_validate_address_register (base);
16078 if (reg == NULL_RTX)
16079 return false;
16081 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
16082 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
16083 /* Base is not valid. */
16084 return false;
16087 /* Validate index register. */
16088 if (index)
16090 rtx reg = ix86_validate_address_register (index);
16092 if (reg == NULL_RTX)
16093 return false;
16095 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
16096 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
16097 /* Index is not valid. */
16098 return false;
16101 /* Index and base should have the same mode. */
16102 if (base && index
16103 && GET_MODE (base) != GET_MODE (index))
16104 return false;
16106 /* Address override works only on the (%reg) part of %fs:(%reg). */
16107 if (seg != ADDR_SPACE_GENERIC
16108 && ((base && GET_MODE (base) != word_mode)
16109 || (index && GET_MODE (index) != word_mode)))
16110 return false;
16112 /* Validate scale factor. */
16113 if (scale != 1)
16115 if (!index)
16116 /* Scale without index. */
16117 return false;
16119 if (scale != 2 && scale != 4 && scale != 8)
16120 /* Scale is not a valid multiplier. */
16121 return false;
16124 /* Validate displacement. */
16125 if (disp)
16127 if (GET_CODE (disp) == CONST
16128 && GET_CODE (XEXP (disp, 0)) == UNSPEC
16129 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
16130 switch (XINT (XEXP (disp, 0), 1))
16132 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
16133 when used. While ABI specify also 32bit relocations, we
16134 don't produce them at all and use IP relative instead.
16135 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
16136 should be loaded via GOT. */
16137 case UNSPEC_GOT:
16138 if (!TARGET_64BIT
16139 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
16140 goto is_legitimate_pic;
16141 /* FALLTHRU */
16142 case UNSPEC_GOTOFF:
16143 gcc_assert (flag_pic);
16144 if (!TARGET_64BIT)
16145 goto is_legitimate_pic;
16147 /* 64bit address unspec. */
16148 return false;
16150 case UNSPEC_GOTPCREL:
16151 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
16152 goto is_legitimate_pic;
16153 /* FALLTHRU */
16154 case UNSPEC_PCREL:
16155 gcc_assert (flag_pic);
16156 goto is_legitimate_pic;
16158 case UNSPEC_GOTTPOFF:
16159 case UNSPEC_GOTNTPOFF:
16160 case UNSPEC_INDNTPOFF:
16161 case UNSPEC_NTPOFF:
16162 case UNSPEC_DTPOFF:
16163 break;
16165 default:
16166 /* Invalid address unspec. */
16167 return false;
16170 else if (SYMBOLIC_CONST (disp)
16171 && (flag_pic
16172 || (TARGET_MACHO
16173 #if TARGET_MACHO
16174 && MACHOPIC_INDIRECT
16175 && !machopic_operand_p (disp)
16176 #endif
16180 is_legitimate_pic:
16181 if (TARGET_64BIT && (index || base))
16183 /* foo@dtpoff(%rX) is ok. */
16184 if (GET_CODE (disp) != CONST
16185 || GET_CODE (XEXP (disp, 0)) != PLUS
16186 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
16187 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
16188 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
16189 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
16190 /* Non-constant pic memory reference. */
16191 return false;
16193 else if ((!TARGET_MACHO || flag_pic)
16194 && ! legitimate_pic_address_disp_p (disp))
16195 /* Displacement is an invalid pic construct. */
16196 return false;
16197 #if TARGET_MACHO
16198 else if (MACHO_DYNAMIC_NO_PIC_P
16199 && !ix86_legitimate_constant_p (Pmode, disp))
16200 /* displacment must be referenced via non_lazy_pointer */
16201 return false;
16202 #endif
16204 /* This code used to verify that a symbolic pic displacement
16205 includes the pic_offset_table_rtx register.
16207 While this is good idea, unfortunately these constructs may
16208 be created by "adds using lea" optimization for incorrect
16209 code like:
16211 int a;
16212 int foo(int i)
16214 return *(&a+i);
16217 This code is nonsensical, but results in addressing
16218 GOT table with pic_offset_table_rtx base. We can't
16219 just refuse it easily, since it gets matched by
16220 "addsi3" pattern, that later gets split to lea in the
16221 case output register differs from input. While this
16222 can be handled by separate addsi pattern for this case
16223 that never results in lea, this seems to be easier and
16224 correct fix for crash to disable this test. */
16226 else if (GET_CODE (disp) != LABEL_REF
16227 && !CONST_INT_P (disp)
16228 && (GET_CODE (disp) != CONST
16229 || !ix86_legitimate_constant_p (Pmode, disp))
16230 && (GET_CODE (disp) != SYMBOL_REF
16231 || !ix86_legitimate_constant_p (Pmode, disp)))
16232 /* Displacement is not constant. */
16233 return false;
16234 else if (TARGET_64BIT
16235 && !x86_64_immediate_operand (disp, VOIDmode))
16236 /* Displacement is out of range. */
16237 return false;
16238 /* In x32 mode, constant addresses are sign extended to 64bit, so
16239 we have to prevent addresses from 0x80000000 to 0xffffffff. */
16240 else if (TARGET_X32 && !(index || base)
16241 && CONST_INT_P (disp)
16242 && val_signbit_known_set_p (SImode, INTVAL (disp)))
16243 return false;
16246 /* Everything looks valid. */
16247 return true;
16250 /* Determine if a given RTX is a valid constant address. */
16252 bool
16253 constant_address_p (rtx x)
16255 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
16258 /* Return a unique alias set for the GOT. */
16260 static alias_set_type
16261 ix86_GOT_alias_set (void)
16263 static alias_set_type set = -1;
16264 if (set == -1)
16265 set = new_alias_set ();
16266 return set;
16269 /* Return a legitimate reference for ORIG (an address) using the
16270 register REG. If REG is 0, a new pseudo is generated.
16272 There are two types of references that must be handled:
16274 1. Global data references must load the address from the GOT, via
16275 the PIC reg. An insn is emitted to do this load, and the reg is
16276 returned.
16278 2. Static data references, constant pool addresses, and code labels
16279 compute the address as an offset from the GOT, whose base is in
16280 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
16281 differentiate them from global data objects. The returned
16282 address is the PIC reg + an unspec constant.
16284 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
16285 reg also appears in the address. */
16287 static rtx
16288 legitimize_pic_address (rtx orig, rtx reg)
16290 rtx addr = orig;
16291 rtx new_rtx = orig;
16293 #if TARGET_MACHO
16294 if (TARGET_MACHO && !TARGET_64BIT)
16296 if (reg == 0)
16297 reg = gen_reg_rtx (Pmode);
16298 /* Use the generic Mach-O PIC machinery. */
16299 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
16301 #endif
16303 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16305 rtx tmp = legitimize_pe_coff_symbol (addr, true);
16306 if (tmp)
16307 return tmp;
16310 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
16311 new_rtx = addr;
16312 else if ((!TARGET_64BIT
16313 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
16314 && !TARGET_PECOFF
16315 && gotoff_operand (addr, Pmode))
16317 /* This symbol may be referenced via a displacement
16318 from the PIC base address (@GOTOFF). */
16319 if (GET_CODE (addr) == CONST)
16320 addr = XEXP (addr, 0);
16322 if (GET_CODE (addr) == PLUS)
16324 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
16325 UNSPEC_GOTOFF);
16326 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
16328 else
16329 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
16331 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16333 if (TARGET_64BIT)
16334 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
16336 if (reg != 0)
16338 gcc_assert (REG_P (reg));
16339 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
16340 new_rtx, reg, 1, OPTAB_DIRECT);
16342 else
16343 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
16345 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
16346 /* We can't use @GOTOFF for text labels
16347 on VxWorks, see gotoff_operand. */
16348 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
16350 rtx tmp = legitimize_pe_coff_symbol (addr, true);
16351 if (tmp)
16352 return tmp;
16354 /* For x64 PE-COFF there is no GOT table,
16355 so we use address directly. */
16356 if (TARGET_64BIT && TARGET_PECOFF)
16358 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
16359 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16361 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
16363 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
16364 UNSPEC_GOTPCREL);
16365 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16366 new_rtx = gen_const_mem (Pmode, new_rtx);
16367 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
16369 else
16371 /* This symbol must be referenced via a load
16372 from the Global Offset Table (@GOT). */
16373 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
16374 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16375 if (TARGET_64BIT)
16376 new_rtx = force_reg (Pmode, new_rtx);
16377 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
16378 new_rtx = gen_const_mem (Pmode, new_rtx);
16379 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
16382 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
16384 else
16386 if (CONST_INT_P (addr)
16387 && !x86_64_immediate_operand (addr, VOIDmode))
16388 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
16389 else if (GET_CODE (addr) == CONST)
16391 addr = XEXP (addr, 0);
16393 /* We must match stuff we generate before. Assume the only
16394 unspecs that can get here are ours. Not that we could do
16395 anything with them anyway.... */
16396 if (GET_CODE (addr) == UNSPEC
16397 || (GET_CODE (addr) == PLUS
16398 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
16399 return orig;
16400 gcc_assert (GET_CODE (addr) == PLUS);
16403 if (GET_CODE (addr) == PLUS)
16405 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
16407 /* Check first to see if this is a constant
16408 offset from a @GOTOFF symbol reference. */
16409 if (!TARGET_PECOFF
16410 && gotoff_operand (op0, Pmode)
16411 && CONST_INT_P (op1))
16413 if (!TARGET_64BIT)
16415 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
16416 UNSPEC_GOTOFF);
16417 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
16418 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16420 if (reg != 0)
16422 gcc_assert (REG_P (reg));
16423 new_rtx = expand_simple_binop (Pmode, PLUS,
16424 pic_offset_table_rtx,
16425 new_rtx, reg, 1,
16426 OPTAB_DIRECT);
16428 else
16429 new_rtx
16430 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
16432 else
16434 if (INTVAL (op1) < -16*1024*1024
16435 || INTVAL (op1) >= 16*1024*1024)
16437 if (!x86_64_immediate_operand (op1, Pmode))
16438 op1 = force_reg (Pmode, op1);
16440 new_rtx
16441 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
16445 else
16447 rtx base = legitimize_pic_address (op0, reg);
16448 machine_mode mode = GET_MODE (base);
16449 new_rtx
16450 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
16452 if (CONST_INT_P (new_rtx))
16454 if (INTVAL (new_rtx) < -16*1024*1024
16455 || INTVAL (new_rtx) >= 16*1024*1024)
16457 if (!x86_64_immediate_operand (new_rtx, mode))
16458 new_rtx = force_reg (mode, new_rtx);
16460 new_rtx
16461 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
16463 else
16464 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
16466 else
16468 /* For %rip addressing, we have to use
16469 just disp32, not base nor index. */
16470 if (TARGET_64BIT
16471 && (GET_CODE (base) == SYMBOL_REF
16472 || GET_CODE (base) == LABEL_REF))
16473 base = force_reg (mode, base);
16474 if (GET_CODE (new_rtx) == PLUS
16475 && CONSTANT_P (XEXP (new_rtx, 1)))
16477 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
16478 new_rtx = XEXP (new_rtx, 1);
16480 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
16485 return new_rtx;
16488 /* Load the thread pointer. If TO_REG is true, force it into a register. */
16490 static rtx
16491 get_thread_pointer (machine_mode tp_mode, bool to_reg)
16493 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
16495 if (GET_MODE (tp) != tp_mode)
16497 gcc_assert (GET_MODE (tp) == SImode);
16498 gcc_assert (tp_mode == DImode);
16500 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
16503 if (to_reg)
16504 tp = copy_to_mode_reg (tp_mode, tp);
16506 return tp;
16509 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16511 static GTY(()) rtx ix86_tls_symbol;
16513 static rtx
16514 ix86_tls_get_addr (void)
16516 if (!ix86_tls_symbol)
16518 const char *sym
16519 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
16520 ? "___tls_get_addr" : "__tls_get_addr");
16522 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
16525 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
16527 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
16528 UNSPEC_PLTOFF);
16529 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
16530 gen_rtx_CONST (Pmode, unspec));
16533 return ix86_tls_symbol;
16536 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16538 static GTY(()) rtx ix86_tls_module_base_symbol;
16541 ix86_tls_module_base (void)
16543 if (!ix86_tls_module_base_symbol)
16545 ix86_tls_module_base_symbol
16546 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
16548 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16549 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16552 return ix86_tls_module_base_symbol;
16555 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
16556 false if we expect this to be used for a memory address and true if
16557 we expect to load the address into a register. */
16559 static rtx
16560 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
16562 rtx dest, base, off;
16563 rtx pic = NULL_RTX, tp = NULL_RTX;
16564 machine_mode tp_mode = Pmode;
16565 int type;
16567 /* Fall back to global dynamic model if tool chain cannot support local
16568 dynamic. */
16569 if (TARGET_SUN_TLS && !TARGET_64BIT
16570 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
16571 && model == TLS_MODEL_LOCAL_DYNAMIC)
16572 model = TLS_MODEL_GLOBAL_DYNAMIC;
16574 switch (model)
16576 case TLS_MODEL_GLOBAL_DYNAMIC:
16577 dest = gen_reg_rtx (Pmode);
16579 if (!TARGET_64BIT)
16581 if (flag_pic && !TARGET_PECOFF)
16582 pic = pic_offset_table_rtx;
16583 else
16585 pic = gen_reg_rtx (Pmode);
16586 emit_insn (gen_set_got (pic));
16590 if (TARGET_GNU2_TLS)
16592 if (TARGET_64BIT)
16593 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
16594 else
16595 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
16597 tp = get_thread_pointer (Pmode, true);
16598 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
16600 if (GET_MODE (x) != Pmode)
16601 x = gen_rtx_ZERO_EXTEND (Pmode, x);
16603 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
16605 else
16607 rtx caddr = ix86_tls_get_addr ();
16609 if (TARGET_64BIT)
16611 rtx rax = gen_rtx_REG (Pmode, AX_REG);
16612 rtx_insn *insns;
16614 start_sequence ();
16615 emit_call_insn
16616 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
16617 insns = get_insns ();
16618 end_sequence ();
16620 if (GET_MODE (x) != Pmode)
16621 x = gen_rtx_ZERO_EXTEND (Pmode, x);
16623 RTL_CONST_CALL_P (insns) = 1;
16624 emit_libcall_block (insns, dest, rax, x);
16626 else
16627 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
16629 break;
16631 case TLS_MODEL_LOCAL_DYNAMIC:
16632 base = gen_reg_rtx (Pmode);
16634 if (!TARGET_64BIT)
16636 if (flag_pic)
16637 pic = pic_offset_table_rtx;
16638 else
16640 pic = gen_reg_rtx (Pmode);
16641 emit_insn (gen_set_got (pic));
16645 if (TARGET_GNU2_TLS)
16647 rtx tmp = ix86_tls_module_base ();
16649 if (TARGET_64BIT)
16650 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
16651 else
16652 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
16654 tp = get_thread_pointer (Pmode, true);
16655 set_unique_reg_note (get_last_insn (), REG_EQUAL,
16656 gen_rtx_MINUS (Pmode, tmp, tp));
16658 else
16660 rtx caddr = ix86_tls_get_addr ();
16662 if (TARGET_64BIT)
16664 rtx rax = gen_rtx_REG (Pmode, AX_REG);
16665 rtx_insn *insns;
16666 rtx eqv;
16668 start_sequence ();
16669 emit_call_insn
16670 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
16671 insns = get_insns ();
16672 end_sequence ();
16674 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
16675 share the LD_BASE result with other LD model accesses. */
16676 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
16677 UNSPEC_TLS_LD_BASE);
16679 RTL_CONST_CALL_P (insns) = 1;
16680 emit_libcall_block (insns, base, rax, eqv);
16682 else
16683 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
16686 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
16687 off = gen_rtx_CONST (Pmode, off);
16689 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
16691 if (TARGET_GNU2_TLS)
16693 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
16695 if (GET_MODE (x) != Pmode)
16696 x = gen_rtx_ZERO_EXTEND (Pmode, x);
16698 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
16700 break;
16702 case TLS_MODEL_INITIAL_EXEC:
16703 if (TARGET_64BIT)
16705 if (TARGET_SUN_TLS && !TARGET_X32)
16707 /* The Sun linker took the AMD64 TLS spec literally
16708 and can only handle %rax as destination of the
16709 initial executable code sequence. */
16711 dest = gen_reg_rtx (DImode);
16712 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
16713 return dest;
16716 /* Generate DImode references to avoid %fs:(%reg32)
16717 problems and linker IE->LE relaxation bug. */
16718 tp_mode = DImode;
16719 pic = NULL;
16720 type = UNSPEC_GOTNTPOFF;
16722 else if (flag_pic)
16724 pic = pic_offset_table_rtx;
16725 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
16727 else if (!TARGET_ANY_GNU_TLS)
16729 pic = gen_reg_rtx (Pmode);
16730 emit_insn (gen_set_got (pic));
16731 type = UNSPEC_GOTTPOFF;
16733 else
16735 pic = NULL;
16736 type = UNSPEC_INDNTPOFF;
16739 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
16740 off = gen_rtx_CONST (tp_mode, off);
16741 if (pic)
16742 off = gen_rtx_PLUS (tp_mode, pic, off);
16743 off = gen_const_mem (tp_mode, off);
16744 set_mem_alias_set (off, ix86_GOT_alias_set ());
16746 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16748 base = get_thread_pointer (tp_mode,
16749 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
16750 off = force_reg (tp_mode, off);
16751 dest = gen_rtx_PLUS (tp_mode, base, off);
16752 if (tp_mode != Pmode)
16753 dest = convert_to_mode (Pmode, dest, 1);
16755 else
16757 base = get_thread_pointer (Pmode, true);
16758 dest = gen_reg_rtx (Pmode);
16759 emit_insn (ix86_gen_sub3 (dest, base, off));
16761 break;
16763 case TLS_MODEL_LOCAL_EXEC:
16764 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
16765 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16766 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
16767 off = gen_rtx_CONST (Pmode, off);
16769 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16771 base = get_thread_pointer (Pmode,
16772 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
16773 return gen_rtx_PLUS (Pmode, base, off);
16775 else
16777 base = get_thread_pointer (Pmode, true);
16778 dest = gen_reg_rtx (Pmode);
16779 emit_insn (ix86_gen_sub3 (dest, base, off));
16781 break;
16783 default:
16784 gcc_unreachable ();
16787 return dest;
16790 /* Return true if OP refers to a TLS address. */
16791 bool
16792 ix86_tls_address_pattern_p (rtx op)
16794 subrtx_var_iterator::array_type array;
16795 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
16797 rtx op = *iter;
16798 if (MEM_P (op))
16800 rtx *x = &XEXP (op, 0);
16801 while (GET_CODE (*x) == PLUS)
16803 int i;
16804 for (i = 0; i < 2; i++)
16806 rtx u = XEXP (*x, i);
16807 if (GET_CODE (u) == ZERO_EXTEND)
16808 u = XEXP (u, 0);
16809 if (GET_CODE (u) == UNSPEC
16810 && XINT (u, 1) == UNSPEC_TP)
16811 return true;
16813 x = &XEXP (*x, 0);
16816 iter.skip_subrtxes ();
16820 return false;
16823 /* Rewrite *LOC so that it refers to a default TLS address space. */
16824 void
16825 ix86_rewrite_tls_address_1 (rtx *loc)
16827 subrtx_ptr_iterator::array_type array;
16828 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
16830 rtx *loc = *iter;
16831 if (MEM_P (*loc))
16833 rtx addr = XEXP (*loc, 0);
16834 rtx *x = &addr;
16835 while (GET_CODE (*x) == PLUS)
16837 int i;
16838 for (i = 0; i < 2; i++)
16840 rtx u = XEXP (*x, i);
16841 if (GET_CODE (u) == ZERO_EXTEND)
16842 u = XEXP (u, 0);
16843 if (GET_CODE (u) == UNSPEC
16844 && XINT (u, 1) == UNSPEC_TP)
16846 addr_space_t as = DEFAULT_TLS_SEG_REG;
16848 *x = XEXP (*x, 1 - i);
16850 *loc = replace_equiv_address_nv (*loc, addr, true);
16851 set_mem_addr_space (*loc, as);
16852 return;
16855 x = &XEXP (*x, 0);
16858 iter.skip_subrtxes ();
16863 /* Rewrite instruction pattern involvning TLS address
16864 so that it refers to a default TLS address space. */
16866 ix86_rewrite_tls_address (rtx pattern)
16868 pattern = copy_insn (pattern);
16869 ix86_rewrite_tls_address_1 (&pattern);
16870 return pattern;
16873 /* Create or return the unique __imp_DECL dllimport symbol corresponding
16874 to symbol DECL if BEIMPORT is true. Otherwise create or return the
16875 unique refptr-DECL symbol corresponding to symbol DECL. */
16877 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
16879 static inline hashval_t hash (tree_map *m) { return m->hash; }
16880 static inline bool
16881 equal (tree_map *a, tree_map *b)
16883 return a->base.from == b->base.from;
16886 static int
16887 keep_cache_entry (tree_map *&m)
16889 return ggc_marked_p (m->base.from);
16893 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
16895 static tree
16896 get_dllimport_decl (tree decl, bool beimport)
16898 struct tree_map *h, in;
16899 const char *name;
16900 const char *prefix;
16901 size_t namelen, prefixlen;
16902 char *imp_name;
16903 tree to;
16904 rtx rtl;
16906 if (!dllimport_map)
16907 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
16909 in.hash = htab_hash_pointer (decl);
16910 in.base.from = decl;
16911 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
16912 h = *loc;
16913 if (h)
16914 return h->to;
16916 *loc = h = ggc_alloc<tree_map> ();
16917 h->hash = in.hash;
16918 h->base.from = decl;
16919 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
16920 VAR_DECL, NULL, ptr_type_node);
16921 DECL_ARTIFICIAL (to) = 1;
16922 DECL_IGNORED_P (to) = 1;
16923 DECL_EXTERNAL (to) = 1;
16924 TREE_READONLY (to) = 1;
16926 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
16927 name = targetm.strip_name_encoding (name);
16928 if (beimport)
16929 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
16930 ? "*__imp_" : "*__imp__";
16931 else
16932 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
16933 namelen = strlen (name);
16934 prefixlen = strlen (prefix);
16935 imp_name = (char *) alloca (namelen + prefixlen + 1);
16936 memcpy (imp_name, prefix, prefixlen);
16937 memcpy (imp_name + prefixlen, name, namelen + 1);
16939 name = ggc_alloc_string (imp_name, namelen + prefixlen);
16940 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
16941 SET_SYMBOL_REF_DECL (rtl, to);
16942 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
16943 if (!beimport)
16945 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
16946 #ifdef SUB_TARGET_RECORD_STUB
16947 SUB_TARGET_RECORD_STUB (name);
16948 #endif
16951 rtl = gen_const_mem (Pmode, rtl);
16952 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
16954 SET_DECL_RTL (to, rtl);
16955 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
16957 return to;
16960 /* Expand SYMBOL into its corresponding far-address symbol.
16961 WANT_REG is true if we require the result be a register. */
16963 static rtx
16964 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
16966 tree imp_decl;
16967 rtx x;
16969 gcc_assert (SYMBOL_REF_DECL (symbol));
16970 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
16972 x = DECL_RTL (imp_decl);
16973 if (want_reg)
16974 x = force_reg (Pmode, x);
16975 return x;
16978 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
16979 true if we require the result be a register. */
16981 static rtx
16982 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
16984 tree imp_decl;
16985 rtx x;
16987 gcc_assert (SYMBOL_REF_DECL (symbol));
16988 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
16990 x = DECL_RTL (imp_decl);
16991 if (want_reg)
16992 x = force_reg (Pmode, x);
16993 return x;
16996 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
16997 is true if we require the result be a register. */
16999 static rtx
17000 legitimize_pe_coff_symbol (rtx addr, bool inreg)
17002 if (!TARGET_PECOFF)
17003 return NULL_RTX;
17005 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
17007 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
17008 return legitimize_dllimport_symbol (addr, inreg);
17009 if (GET_CODE (addr) == CONST
17010 && GET_CODE (XEXP (addr, 0)) == PLUS
17011 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
17012 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
17014 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
17015 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
17019 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
17020 return NULL_RTX;
17021 if (GET_CODE (addr) == SYMBOL_REF
17022 && !is_imported_p (addr)
17023 && SYMBOL_REF_EXTERNAL_P (addr)
17024 && SYMBOL_REF_DECL (addr))
17025 return legitimize_pe_coff_extern_decl (addr, inreg);
17027 if (GET_CODE (addr) == CONST
17028 && GET_CODE (XEXP (addr, 0)) == PLUS
17029 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
17030 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
17031 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
17032 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
17034 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
17035 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
17037 return NULL_RTX;
17040 /* Try machine-dependent ways of modifying an illegitimate address
17041 to be legitimate. If we find one, return the new, valid address.
17042 This macro is used in only one place: `memory_address' in explow.c.
17044 OLDX is the address as it was before break_out_memory_refs was called.
17045 In some cases it is useful to look at this to decide what needs to be done.
17047 It is always safe for this macro to do nothing. It exists to recognize
17048 opportunities to optimize the output.
17050 For the 80386, we handle X+REG by loading X into a register R and
17051 using R+REG. R will go in a general reg and indexing will be used.
17052 However, if REG is a broken-out memory address or multiplication,
17053 nothing needs to be done because REG can certainly go in a general reg.
17055 When -fpic is used, special handling is needed for symbolic references.
17056 See comments by legitimize_pic_address in i386.c for details. */
17058 static rtx
17059 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
17061 bool changed = false;
17062 unsigned log;
17064 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
17065 if (log)
17066 return legitimize_tls_address (x, (enum tls_model) log, false);
17067 if (GET_CODE (x) == CONST
17068 && GET_CODE (XEXP (x, 0)) == PLUS
17069 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
17070 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
17072 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
17073 (enum tls_model) log, false);
17074 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
17077 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
17079 rtx tmp = legitimize_pe_coff_symbol (x, true);
17080 if (tmp)
17081 return tmp;
17084 if (flag_pic && SYMBOLIC_CONST (x))
17085 return legitimize_pic_address (x, 0);
17087 #if TARGET_MACHO
17088 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
17089 return machopic_indirect_data_reference (x, 0);
17090 #endif
17092 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
17093 if (GET_CODE (x) == ASHIFT
17094 && CONST_INT_P (XEXP (x, 1))
17095 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
17097 changed = true;
17098 log = INTVAL (XEXP (x, 1));
17099 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
17100 GEN_INT (1 << log));
17103 if (GET_CODE (x) == PLUS)
17105 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
17107 if (GET_CODE (XEXP (x, 0)) == ASHIFT
17108 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
17109 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
17111 changed = true;
17112 log = INTVAL (XEXP (XEXP (x, 0), 1));
17113 XEXP (x, 0) = gen_rtx_MULT (Pmode,
17114 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
17115 GEN_INT (1 << log));
17118 if (GET_CODE (XEXP (x, 1)) == ASHIFT
17119 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
17120 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
17122 changed = true;
17123 log = INTVAL (XEXP (XEXP (x, 1), 1));
17124 XEXP (x, 1) = gen_rtx_MULT (Pmode,
17125 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
17126 GEN_INT (1 << log));
17129 /* Put multiply first if it isn't already. */
17130 if (GET_CODE (XEXP (x, 1)) == MULT)
17132 std::swap (XEXP (x, 0), XEXP (x, 1));
17133 changed = true;
17136 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
17137 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
17138 created by virtual register instantiation, register elimination, and
17139 similar optimizations. */
17140 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
17142 changed = true;
17143 x = gen_rtx_PLUS (Pmode,
17144 gen_rtx_PLUS (Pmode, XEXP (x, 0),
17145 XEXP (XEXP (x, 1), 0)),
17146 XEXP (XEXP (x, 1), 1));
17149 /* Canonicalize
17150 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
17151 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
17152 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
17153 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17154 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
17155 && CONSTANT_P (XEXP (x, 1)))
17157 rtx constant;
17158 rtx other = NULL_RTX;
17160 if (CONST_INT_P (XEXP (x, 1)))
17162 constant = XEXP (x, 1);
17163 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
17165 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
17167 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
17168 other = XEXP (x, 1);
17170 else
17171 constant = 0;
17173 if (constant)
17175 changed = true;
17176 x = gen_rtx_PLUS (Pmode,
17177 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
17178 XEXP (XEXP (XEXP (x, 0), 1), 0)),
17179 plus_constant (Pmode, other,
17180 INTVAL (constant)));
17184 if (changed && ix86_legitimate_address_p (mode, x, false))
17185 return x;
17187 if (GET_CODE (XEXP (x, 0)) == MULT)
17189 changed = true;
17190 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
17193 if (GET_CODE (XEXP (x, 1)) == MULT)
17195 changed = true;
17196 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
17199 if (changed
17200 && REG_P (XEXP (x, 1))
17201 && REG_P (XEXP (x, 0)))
17202 return x;
17204 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
17206 changed = true;
17207 x = legitimize_pic_address (x, 0);
17210 if (changed && ix86_legitimate_address_p (mode, x, false))
17211 return x;
17213 if (REG_P (XEXP (x, 0)))
17215 rtx temp = gen_reg_rtx (Pmode);
17216 rtx val = force_operand (XEXP (x, 1), temp);
17217 if (val != temp)
17219 val = convert_to_mode (Pmode, val, 1);
17220 emit_move_insn (temp, val);
17223 XEXP (x, 1) = temp;
17224 return x;
17227 else if (REG_P (XEXP (x, 1)))
17229 rtx temp = gen_reg_rtx (Pmode);
17230 rtx val = force_operand (XEXP (x, 0), temp);
17231 if (val != temp)
17233 val = convert_to_mode (Pmode, val, 1);
17234 emit_move_insn (temp, val);
17237 XEXP (x, 0) = temp;
17238 return x;
17242 return x;
17245 /* Print an integer constant expression in assembler syntax. Addition
17246 and subtraction are the only arithmetic that may appear in these
17247 expressions. FILE is the stdio stream to write to, X is the rtx, and
17248 CODE is the operand print code from the output string. */
17250 static void
17251 output_pic_addr_const (FILE *file, rtx x, int code)
17253 char buf[256];
17255 switch (GET_CODE (x))
17257 case PC:
17258 gcc_assert (flag_pic);
17259 putc ('.', file);
17260 break;
17262 case SYMBOL_REF:
17263 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
17264 output_addr_const (file, x);
17265 else
17267 const char *name = XSTR (x, 0);
17269 /* Mark the decl as referenced so that cgraph will
17270 output the function. */
17271 if (SYMBOL_REF_DECL (x))
17272 mark_decl_referenced (SYMBOL_REF_DECL (x));
17274 #if TARGET_MACHO
17275 if (MACHOPIC_INDIRECT
17276 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
17277 name = machopic_indirection_name (x, /*stub_p=*/true);
17278 #endif
17279 assemble_name (file, name);
17281 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
17282 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
17283 fputs ("@PLT", file);
17284 break;
17286 case LABEL_REF:
17287 x = XEXP (x, 0);
17288 /* FALLTHRU */
17289 case CODE_LABEL:
17290 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
17291 assemble_name (asm_out_file, buf);
17292 break;
17294 case CONST_INT:
17295 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17296 break;
17298 case CONST:
17299 /* This used to output parentheses around the expression,
17300 but that does not work on the 386 (either ATT or BSD assembler). */
17301 output_pic_addr_const (file, XEXP (x, 0), code);
17302 break;
17304 case CONST_DOUBLE:
17305 /* We can't handle floating point constants;
17306 TARGET_PRINT_OPERAND must handle them. */
17307 output_operand_lossage ("floating constant misused");
17308 break;
17310 case PLUS:
17311 /* Some assemblers need integer constants to appear first. */
17312 if (CONST_INT_P (XEXP (x, 0)))
17314 output_pic_addr_const (file, XEXP (x, 0), code);
17315 putc ('+', file);
17316 output_pic_addr_const (file, XEXP (x, 1), code);
17318 else
17320 gcc_assert (CONST_INT_P (XEXP (x, 1)));
17321 output_pic_addr_const (file, XEXP (x, 1), code);
17322 putc ('+', file);
17323 output_pic_addr_const (file, XEXP (x, 0), code);
17325 break;
17327 case MINUS:
17328 if (!TARGET_MACHO)
17329 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
17330 output_pic_addr_const (file, XEXP (x, 0), code);
17331 putc ('-', file);
17332 output_pic_addr_const (file, XEXP (x, 1), code);
17333 if (!TARGET_MACHO)
17334 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
17335 break;
17337 case UNSPEC:
17338 gcc_assert (XVECLEN (x, 0) == 1);
17339 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
17340 switch (XINT (x, 1))
17342 case UNSPEC_GOT:
17343 fputs ("@GOT", file);
17344 break;
17345 case UNSPEC_GOTOFF:
17346 fputs ("@GOTOFF", file);
17347 break;
17348 case UNSPEC_PLTOFF:
17349 fputs ("@PLTOFF", file);
17350 break;
17351 case UNSPEC_PCREL:
17352 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17353 "(%rip)" : "[rip]", file);
17354 break;
17355 case UNSPEC_GOTPCREL:
17356 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17357 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
17358 break;
17359 case UNSPEC_GOTTPOFF:
17360 /* FIXME: This might be @TPOFF in Sun ld too. */
17361 fputs ("@gottpoff", file);
17362 break;
17363 case UNSPEC_TPOFF:
17364 fputs ("@tpoff", file);
17365 break;
17366 case UNSPEC_NTPOFF:
17367 if (TARGET_64BIT)
17368 fputs ("@tpoff", file);
17369 else
17370 fputs ("@ntpoff", file);
17371 break;
17372 case UNSPEC_DTPOFF:
17373 fputs ("@dtpoff", file);
17374 break;
17375 case UNSPEC_GOTNTPOFF:
17376 if (TARGET_64BIT)
17377 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17378 "@gottpoff(%rip)": "@gottpoff[rip]", file);
17379 else
17380 fputs ("@gotntpoff", file);
17381 break;
17382 case UNSPEC_INDNTPOFF:
17383 fputs ("@indntpoff", file);
17384 break;
17385 #if TARGET_MACHO
17386 case UNSPEC_MACHOPIC_OFFSET:
17387 putc ('-', file);
17388 machopic_output_function_base_name (file);
17389 break;
17390 #endif
17391 default:
17392 output_operand_lossage ("invalid UNSPEC as operand");
17393 break;
17395 break;
17397 default:
17398 output_operand_lossage ("invalid expression as operand");
17402 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
17403 We need to emit DTP-relative relocations. */
17405 static void ATTRIBUTE_UNUSED
17406 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
17408 fputs (ASM_LONG, file);
17409 output_addr_const (file, x);
17410 fputs ("@dtpoff", file);
17411 switch (size)
17413 case 4:
17414 break;
17415 case 8:
17416 fputs (", 0", file);
17417 break;
17418 default:
17419 gcc_unreachable ();
17423 /* Return true if X is a representation of the PIC register. This copes
17424 with calls from ix86_find_base_term, where the register might have
17425 been replaced by a cselib value. */
17427 static bool
17428 ix86_pic_register_p (rtx x)
17430 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
17431 return (pic_offset_table_rtx
17432 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
17433 else if (!REG_P (x))
17434 return false;
17435 else if (pic_offset_table_rtx)
17437 if (REGNO (x) == REGNO (pic_offset_table_rtx))
17438 return true;
17439 if (HARD_REGISTER_P (x)
17440 && !HARD_REGISTER_P (pic_offset_table_rtx)
17441 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
17442 return true;
17443 return false;
17445 else
17446 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
17449 /* Helper function for ix86_delegitimize_address.
17450 Attempt to delegitimize TLS local-exec accesses. */
17452 static rtx
17453 ix86_delegitimize_tls_address (rtx orig_x)
17455 rtx x = orig_x, unspec;
17456 struct ix86_address addr;
17458 if (!TARGET_TLS_DIRECT_SEG_REFS)
17459 return orig_x;
17460 if (MEM_P (x))
17461 x = XEXP (x, 0);
17462 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
17463 return orig_x;
17464 if (ix86_decompose_address (x, &addr) == 0
17465 || addr.seg != DEFAULT_TLS_SEG_REG
17466 || addr.disp == NULL_RTX
17467 || GET_CODE (addr.disp) != CONST)
17468 return orig_x;
17469 unspec = XEXP (addr.disp, 0);
17470 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
17471 unspec = XEXP (unspec, 0);
17472 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
17473 return orig_x;
17474 x = XVECEXP (unspec, 0, 0);
17475 gcc_assert (GET_CODE (x) == SYMBOL_REF);
17476 if (unspec != XEXP (addr.disp, 0))
17477 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
17478 if (addr.index)
17480 rtx idx = addr.index;
17481 if (addr.scale != 1)
17482 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
17483 x = gen_rtx_PLUS (Pmode, idx, x);
17485 if (addr.base)
17486 x = gen_rtx_PLUS (Pmode, addr.base, x);
17487 if (MEM_P (orig_x))
17488 x = replace_equiv_address_nv (orig_x, x);
17489 return x;
17492 /* In the name of slightly smaller debug output, and to cater to
17493 general assembler lossage, recognize PIC+GOTOFF and turn it back
17494 into a direct symbol reference.
17496 On Darwin, this is necessary to avoid a crash, because Darwin
17497 has a different PIC label for each routine but the DWARF debugging
17498 information is not associated with any particular routine, so it's
17499 necessary to remove references to the PIC label from RTL stored by
17500 the DWARF output code.
17502 This helper is used in the normal ix86_delegitimize_address
17503 entrypoint (e.g. used in the target delegitimization hook) and
17504 in ix86_find_base_term. As compile time memory optimization, we
17505 avoid allocating rtxes that will not change anything on the outcome
17506 of the callers (find_base_value and find_base_term). */
17508 static inline rtx
17509 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
17511 rtx orig_x = delegitimize_mem_from_attrs (x);
17512 /* addend is NULL or some rtx if x is something+GOTOFF where
17513 something doesn't include the PIC register. */
17514 rtx addend = NULL_RTX;
17515 /* reg_addend is NULL or a multiple of some register. */
17516 rtx reg_addend = NULL_RTX;
17517 /* const_addend is NULL or a const_int. */
17518 rtx const_addend = NULL_RTX;
17519 /* This is the result, or NULL. */
17520 rtx result = NULL_RTX;
17522 x = orig_x;
17524 if (MEM_P (x))
17525 x = XEXP (x, 0);
17527 if (TARGET_64BIT)
17529 if (GET_CODE (x) == CONST
17530 && GET_CODE (XEXP (x, 0)) == PLUS
17531 && GET_MODE (XEXP (x, 0)) == Pmode
17532 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
17533 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
17534 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
17536 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
17537 base. A CONST can't be arg_pointer_rtx based. */
17538 if (base_term_p && MEM_P (orig_x))
17539 return orig_x;
17540 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
17541 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
17542 if (MEM_P (orig_x))
17543 x = replace_equiv_address_nv (orig_x, x);
17544 return x;
17547 if (GET_CODE (x) == CONST
17548 && GET_CODE (XEXP (x, 0)) == UNSPEC
17549 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
17550 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
17551 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
17553 x = XVECEXP (XEXP (x, 0), 0, 0);
17554 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
17556 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
17557 if (x == NULL_RTX)
17558 return orig_x;
17560 return x;
17563 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
17564 return ix86_delegitimize_tls_address (orig_x);
17566 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
17567 and -mcmodel=medium -fpic. */
17570 if (GET_CODE (x) != PLUS
17571 || GET_CODE (XEXP (x, 1)) != CONST)
17572 return ix86_delegitimize_tls_address (orig_x);
17574 if (ix86_pic_register_p (XEXP (x, 0)))
17575 /* %ebx + GOT/GOTOFF */
17577 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17579 /* %ebx + %reg * scale + GOT/GOTOFF */
17580 reg_addend = XEXP (x, 0);
17581 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
17582 reg_addend = XEXP (reg_addend, 1);
17583 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
17584 reg_addend = XEXP (reg_addend, 0);
17585 else
17587 reg_addend = NULL_RTX;
17588 addend = XEXP (x, 0);
17591 else
17592 addend = XEXP (x, 0);
17594 x = XEXP (XEXP (x, 1), 0);
17595 if (GET_CODE (x) == PLUS
17596 && CONST_INT_P (XEXP (x, 1)))
17598 const_addend = XEXP (x, 1);
17599 x = XEXP (x, 0);
17602 if (GET_CODE (x) == UNSPEC
17603 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
17604 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
17605 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
17606 && !MEM_P (orig_x) && !addend)))
17607 result = XVECEXP (x, 0, 0);
17609 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
17610 && !MEM_P (orig_x))
17611 result = XVECEXP (x, 0, 0);
17613 if (! result)
17614 return ix86_delegitimize_tls_address (orig_x);
17616 /* For (PLUS something CONST_INT) both find_base_{value,term} just
17617 recurse on the first operand. */
17618 if (const_addend && !base_term_p)
17619 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
17620 if (reg_addend)
17621 result = gen_rtx_PLUS (Pmode, reg_addend, result);
17622 if (addend)
17624 /* If the rest of original X doesn't involve the PIC register, add
17625 addend and subtract pic_offset_table_rtx. This can happen e.g.
17626 for code like:
17627 leal (%ebx, %ecx, 4), %ecx
17629 movl foo@GOTOFF(%ecx), %edx
17630 in which case we return (%ecx - %ebx) + foo
17631 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
17632 and reload has completed. Don't do the latter for debug,
17633 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
17634 if (pic_offset_table_rtx
17635 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
17636 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
17637 pic_offset_table_rtx),
17638 result);
17639 else if (base_term_p
17640 && pic_offset_table_rtx
17641 && !TARGET_MACHO
17642 && !TARGET_VXWORKS_RTP)
17644 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
17645 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
17646 result = gen_rtx_PLUS (Pmode, tmp, result);
17648 else
17649 return orig_x;
17651 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
17653 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
17654 if (result == NULL_RTX)
17655 return orig_x;
17657 return result;
17660 /* The normal instantiation of the above template. */
17662 static rtx
17663 ix86_delegitimize_address (rtx x)
17665 return ix86_delegitimize_address_1 (x, false);
17668 /* If X is a machine specific address (i.e. a symbol or label being
17669 referenced as a displacement from the GOT implemented using an
17670 UNSPEC), then return the base term. Otherwise return X. */
17673 ix86_find_base_term (rtx x)
17675 rtx term;
17677 if (TARGET_64BIT)
17679 if (GET_CODE (x) != CONST)
17680 return x;
17681 term = XEXP (x, 0);
17682 if (GET_CODE (term) == PLUS
17683 && CONST_INT_P (XEXP (term, 1)))
17684 term = XEXP (term, 0);
17685 if (GET_CODE (term) != UNSPEC
17686 || (XINT (term, 1) != UNSPEC_GOTPCREL
17687 && XINT (term, 1) != UNSPEC_PCREL))
17688 return x;
17690 return XVECEXP (term, 0, 0);
17693 return ix86_delegitimize_address_1 (x, true);
17696 /* Return true if X shouldn't be emitted into the debug info.
17697 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
17698 symbol easily into the .debug_info section, so we need not to
17699 delegitimize, but instead assemble as @gotoff.
17700 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
17701 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
17703 static bool
17704 ix86_const_not_ok_for_debug_p (rtx x)
17706 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
17707 return true;
17709 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
17710 return true;
17712 return false;
17715 static void
17716 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
17717 bool fp, FILE *file)
17719 const char *suffix;
17721 if (mode == CCFPmode)
17723 code = ix86_fp_compare_code_to_integer (code);
17724 mode = CCmode;
17726 if (reverse)
17727 code = reverse_condition (code);
17729 switch (code)
17731 case EQ:
17732 gcc_assert (mode != CCGZmode);
17733 switch (mode)
17735 case E_CCAmode:
17736 suffix = "a";
17737 break;
17738 case E_CCCmode:
17739 suffix = "c";
17740 break;
17741 case E_CCOmode:
17742 suffix = "o";
17743 break;
17744 case E_CCPmode:
17745 suffix = "p";
17746 break;
17747 case E_CCSmode:
17748 suffix = "s";
17749 break;
17750 default:
17751 suffix = "e";
17752 break;
17754 break;
17755 case NE:
17756 gcc_assert (mode != CCGZmode);
17757 switch (mode)
17759 case E_CCAmode:
17760 suffix = "na";
17761 break;
17762 case E_CCCmode:
17763 suffix = "nc";
17764 break;
17765 case E_CCOmode:
17766 suffix = "no";
17767 break;
17768 case E_CCPmode:
17769 suffix = "np";
17770 break;
17771 case E_CCSmode:
17772 suffix = "ns";
17773 break;
17774 default:
17775 suffix = "ne";
17776 break;
17778 break;
17779 case GT:
17780 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
17781 suffix = "g";
17782 break;
17783 case GTU:
17784 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
17785 Those same assemblers have the same but opposite lossage on cmov. */
17786 if (mode == CCmode)
17787 suffix = fp ? "nbe" : "a";
17788 else
17789 gcc_unreachable ();
17790 break;
17791 case LT:
17792 switch (mode)
17794 case E_CCNOmode:
17795 case E_CCGOCmode:
17796 suffix = "s";
17797 break;
17799 case E_CCmode:
17800 case E_CCGCmode:
17801 case E_CCGZmode:
17802 suffix = "l";
17803 break;
17805 default:
17806 gcc_unreachable ();
17808 break;
17809 case LTU:
17810 if (mode == CCmode || mode == CCGZmode)
17811 suffix = "b";
17812 else if (mode == CCCmode)
17813 suffix = fp ? "b" : "c";
17814 else
17815 gcc_unreachable ();
17816 break;
17817 case GE:
17818 switch (mode)
17820 case E_CCNOmode:
17821 case E_CCGOCmode:
17822 suffix = "ns";
17823 break;
17825 case E_CCmode:
17826 case E_CCGCmode:
17827 case E_CCGZmode:
17828 suffix = "ge";
17829 break;
17831 default:
17832 gcc_unreachable ();
17834 break;
17835 case GEU:
17836 if (mode == CCmode || mode == CCGZmode)
17837 suffix = "nb";
17838 else if (mode == CCCmode)
17839 suffix = fp ? "nb" : "nc";
17840 else
17841 gcc_unreachable ();
17842 break;
17843 case LE:
17844 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
17845 suffix = "le";
17846 break;
17847 case LEU:
17848 if (mode == CCmode)
17849 suffix = "be";
17850 else
17851 gcc_unreachable ();
17852 break;
17853 case UNORDERED:
17854 suffix = fp ? "u" : "p";
17855 break;
17856 case ORDERED:
17857 suffix = fp ? "nu" : "np";
17858 break;
17859 default:
17860 gcc_unreachable ();
17862 fputs (suffix, file);
17865 /* Print the name of register X to FILE based on its machine mode and number.
17866 If CODE is 'w', pretend the mode is HImode.
17867 If CODE is 'b', pretend the mode is QImode.
17868 If CODE is 'k', pretend the mode is SImode.
17869 If CODE is 'q', pretend the mode is DImode.
17870 If CODE is 'x', pretend the mode is V4SFmode.
17871 If CODE is 't', pretend the mode is V8SFmode.
17872 If CODE is 'g', pretend the mode is V16SFmode.
17873 If CODE is 'h', pretend the reg is the 'high' byte register.
17874 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
17875 If CODE is 'd', duplicate the operand for AVX instruction.
17876 If CODE is 'V', print naked full integer register name without %.
17879 void
17880 print_reg (rtx x, int code, FILE *file)
17882 const char *reg;
17883 int msize;
17884 unsigned int regno;
17885 bool duplicated;
17887 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
17888 putc ('%', file);
17890 if (x == pc_rtx)
17892 gcc_assert (TARGET_64BIT);
17893 fputs ("rip", file);
17894 return;
17897 if (code == 'y' && STACK_TOP_P (x))
17899 fputs ("st(0)", file);
17900 return;
17903 if (code == 'w')
17904 msize = 2;
17905 else if (code == 'b')
17906 msize = 1;
17907 else if (code == 'k')
17908 msize = 4;
17909 else if (code == 'q')
17910 msize = 8;
17911 else if (code == 'h')
17912 msize = 0;
17913 else if (code == 'x')
17914 msize = 16;
17915 else if (code == 't')
17916 msize = 32;
17917 else if (code == 'g')
17918 msize = 64;
17919 else
17920 msize = GET_MODE_SIZE (GET_MODE (x));
17922 regno = REGNO (x);
17924 if (regno == ARG_POINTER_REGNUM
17925 || regno == FRAME_POINTER_REGNUM
17926 || regno == FPSR_REG
17927 || regno == FPCR_REG)
17929 output_operand_lossage
17930 ("invalid use of register '%s'", reg_names[regno]);
17931 return;
17933 else if (regno == FLAGS_REG)
17935 output_operand_lossage ("invalid use of asm flag output");
17936 return;
17939 if (code == 'V')
17941 if (GENERAL_REGNO_P (regno))
17942 msize = GET_MODE_SIZE (word_mode);
17943 else
17944 error ("'V' modifier on non-integer register");
17947 duplicated = code == 'd' && TARGET_AVX;
17949 switch (msize)
17951 case 16:
17952 case 12:
17953 case 8:
17954 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
17955 warning (0, "unsupported size for integer register");
17956 /* FALLTHRU */
17957 case 4:
17958 if (LEGACY_INT_REGNO_P (regno))
17959 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
17960 /* FALLTHRU */
17961 case 2:
17962 normal:
17963 reg = hi_reg_name[regno];
17964 break;
17965 case 1:
17966 if (regno >= ARRAY_SIZE (qi_reg_name))
17967 goto normal;
17968 if (!ANY_QI_REGNO_P (regno))
17969 error ("unsupported size for integer register");
17970 reg = qi_reg_name[regno];
17971 break;
17972 case 0:
17973 if (regno >= ARRAY_SIZE (qi_high_reg_name))
17974 goto normal;
17975 reg = qi_high_reg_name[regno];
17976 break;
17977 case 32:
17978 case 64:
17979 if (SSE_REGNO_P (regno))
17981 gcc_assert (!duplicated);
17982 putc (msize == 32 ? 'y' : 'z', file);
17983 reg = hi_reg_name[regno] + 1;
17984 break;
17986 goto normal;
17987 default:
17988 gcc_unreachable ();
17991 fputs (reg, file);
17993 /* Irritatingly, AMD extended registers use
17994 different naming convention: "r%d[bwd]" */
17995 if (REX_INT_REGNO_P (regno))
17997 gcc_assert (TARGET_64BIT);
17998 switch (msize)
18000 case 0:
18001 error ("extended registers have no high halves");
18002 break;
18003 case 1:
18004 putc ('b', file);
18005 break;
18006 case 2:
18007 putc ('w', file);
18008 break;
18009 case 4:
18010 putc ('d', file);
18011 break;
18012 case 8:
18013 /* no suffix */
18014 break;
18015 default:
18016 error ("unsupported operand size for extended register");
18017 break;
18019 return;
18022 if (duplicated)
18024 if (ASSEMBLER_DIALECT == ASM_ATT)
18025 fprintf (file, ", %%%s", reg);
18026 else
18027 fprintf (file, ", %s", reg);
18031 /* Meaning of CODE:
18032 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
18033 C -- print opcode suffix for set/cmov insn.
18034 c -- like C, but print reversed condition
18035 F,f -- likewise, but for floating-point.
18036 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
18037 otherwise nothing
18038 R -- print embedded rounding and sae.
18039 r -- print only sae.
18040 z -- print the opcode suffix for the size of the current operand.
18041 Z -- likewise, with special suffixes for x87 instructions.
18042 * -- print a star (in certain assembler syntax)
18043 A -- print an absolute memory reference.
18044 E -- print address with DImode register names if TARGET_64BIT.
18045 w -- print the operand as if it's a "word" (HImode) even if it isn't.
18046 s -- print a shift double count, followed by the assemblers argument
18047 delimiter.
18048 b -- print the QImode name of the register for the indicated operand.
18049 %b0 would print %al if operands[0] is reg 0.
18050 w -- likewise, print the HImode name of the register.
18051 k -- likewise, print the SImode name of the register.
18052 q -- likewise, print the DImode name of the register.
18053 x -- likewise, print the V4SFmode name of the register.
18054 t -- likewise, print the V8SFmode name of the register.
18055 g -- likewise, print the V16SFmode name of the register.
18056 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
18057 y -- print "st(0)" instead of "st" as a register.
18058 d -- print duplicated register operand for AVX instruction.
18059 D -- print condition for SSE cmp instruction.
18060 P -- if PIC, print an @PLT suffix.
18061 p -- print raw symbol name.
18062 X -- don't print any sort of PIC '@' suffix for a symbol.
18063 & -- print some in-use local-dynamic symbol name.
18064 H -- print a memory address offset by 8; used for sse high-parts
18065 Y -- print condition for XOP pcom* instruction.
18066 V -- print naked full integer register name without %.
18067 + -- print a branch hint as 'cs' or 'ds' prefix
18068 ; -- print a semicolon (after prefixes due to bug in older gas).
18069 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
18070 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
18071 ! -- print MPX prefix for jxx/call/ret instructions if required.
18074 void
18075 ix86_print_operand (FILE *file, rtx x, int code)
18077 if (code)
18079 switch (code)
18081 case 'A':
18082 switch (ASSEMBLER_DIALECT)
18084 case ASM_ATT:
18085 putc ('*', file);
18086 break;
18088 case ASM_INTEL:
18089 /* Intel syntax. For absolute addresses, registers should not
18090 be surrounded by braces. */
18091 if (!REG_P (x))
18093 putc ('[', file);
18094 ix86_print_operand (file, x, 0);
18095 putc (']', file);
18096 return;
18098 break;
18100 default:
18101 gcc_unreachable ();
18104 ix86_print_operand (file, x, 0);
18105 return;
18107 case 'E':
18108 /* Wrap address in an UNSPEC to declare special handling. */
18109 if (TARGET_64BIT)
18110 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
18112 output_address (VOIDmode, x);
18113 return;
18115 case 'L':
18116 if (ASSEMBLER_DIALECT == ASM_ATT)
18117 putc ('l', file);
18118 return;
18120 case 'W':
18121 if (ASSEMBLER_DIALECT == ASM_ATT)
18122 putc ('w', file);
18123 return;
18125 case 'B':
18126 if (ASSEMBLER_DIALECT == ASM_ATT)
18127 putc ('b', file);
18128 return;
18130 case 'Q':
18131 if (ASSEMBLER_DIALECT == ASM_ATT)
18132 putc ('l', file);
18133 return;
18135 case 'S':
18136 if (ASSEMBLER_DIALECT == ASM_ATT)
18137 putc ('s', file);
18138 return;
18140 case 'T':
18141 if (ASSEMBLER_DIALECT == ASM_ATT)
18142 putc ('t', file);
18143 return;
18145 case 'O':
18146 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
18147 if (ASSEMBLER_DIALECT != ASM_ATT)
18148 return;
18150 switch (GET_MODE_SIZE (GET_MODE (x)))
18152 case 2:
18153 putc ('w', file);
18154 break;
18156 case 4:
18157 putc ('l', file);
18158 break;
18160 case 8:
18161 putc ('q', file);
18162 break;
18164 default:
18165 output_operand_lossage ("invalid operand size for operand "
18166 "code 'O'");
18167 return;
18170 putc ('.', file);
18171 #endif
18172 return;
18174 case 'z':
18175 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
18177 /* Opcodes don't get size suffixes if using Intel opcodes. */
18178 if (ASSEMBLER_DIALECT == ASM_INTEL)
18179 return;
18181 switch (GET_MODE_SIZE (GET_MODE (x)))
18183 case 1:
18184 putc ('b', file);
18185 return;
18187 case 2:
18188 putc ('w', file);
18189 return;
18191 case 4:
18192 putc ('l', file);
18193 return;
18195 case 8:
18196 putc ('q', file);
18197 return;
18199 default:
18200 output_operand_lossage ("invalid operand size for operand "
18201 "code 'z'");
18202 return;
18206 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
18207 warning (0, "non-integer operand used with operand code 'z'");
18208 /* FALLTHRU */
18210 case 'Z':
18211 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
18212 if (ASSEMBLER_DIALECT == ASM_INTEL)
18213 return;
18215 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
18217 switch (GET_MODE_SIZE (GET_MODE (x)))
18219 case 2:
18220 #ifdef HAVE_AS_IX86_FILDS
18221 putc ('s', file);
18222 #endif
18223 return;
18225 case 4:
18226 putc ('l', file);
18227 return;
18229 case 8:
18230 #ifdef HAVE_AS_IX86_FILDQ
18231 putc ('q', file);
18232 #else
18233 fputs ("ll", file);
18234 #endif
18235 return;
18237 default:
18238 break;
18241 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
18243 /* 387 opcodes don't get size suffixes
18244 if the operands are registers. */
18245 if (STACK_REG_P (x))
18246 return;
18248 switch (GET_MODE_SIZE (GET_MODE (x)))
18250 case 4:
18251 putc ('s', file);
18252 return;
18254 case 8:
18255 putc ('l', file);
18256 return;
18258 case 12:
18259 case 16:
18260 putc ('t', file);
18261 return;
18263 default:
18264 break;
18267 else
18269 output_operand_lossage ("invalid operand type used with "
18270 "operand code 'Z'");
18271 return;
18274 output_operand_lossage ("invalid operand size for operand code 'Z'");
18275 return;
18277 case 'd':
18278 case 'b':
18279 case 'w':
18280 case 'k':
18281 case 'q':
18282 case 'h':
18283 case 't':
18284 case 'g':
18285 case 'y':
18286 case 'x':
18287 case 'X':
18288 case 'P':
18289 case 'p':
18290 case 'V':
18291 break;
18293 case 's':
18294 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
18296 ix86_print_operand (file, x, 0);
18297 fputs (", ", file);
18299 return;
18301 case 'Y':
18302 switch (GET_CODE (x))
18304 case NE:
18305 fputs ("neq", file);
18306 break;
18307 case EQ:
18308 fputs ("eq", file);
18309 break;
18310 case GE:
18311 case GEU:
18312 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
18313 break;
18314 case GT:
18315 case GTU:
18316 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
18317 break;
18318 case LE:
18319 case LEU:
18320 fputs ("le", file);
18321 break;
18322 case LT:
18323 case LTU:
18324 fputs ("lt", file);
18325 break;
18326 case UNORDERED:
18327 fputs ("unord", file);
18328 break;
18329 case ORDERED:
18330 fputs ("ord", file);
18331 break;
18332 case UNEQ:
18333 fputs ("ueq", file);
18334 break;
18335 case UNGE:
18336 fputs ("nlt", file);
18337 break;
18338 case UNGT:
18339 fputs ("nle", file);
18340 break;
18341 case UNLE:
18342 fputs ("ule", file);
18343 break;
18344 case UNLT:
18345 fputs ("ult", file);
18346 break;
18347 case LTGT:
18348 fputs ("une", file);
18349 break;
18350 default:
18351 output_operand_lossage ("operand is not a condition code, "
18352 "invalid operand code 'Y'");
18353 return;
18355 return;
18357 case 'D':
18358 /* Little bit of braindamage here. The SSE compare instructions
18359 does use completely different names for the comparisons that the
18360 fp conditional moves. */
18361 switch (GET_CODE (x))
18363 case UNEQ:
18364 if (TARGET_AVX)
18366 fputs ("eq_us", file);
18367 break;
18369 /* FALLTHRU */
18370 case EQ:
18371 fputs ("eq", file);
18372 break;
18373 case UNLT:
18374 if (TARGET_AVX)
18376 fputs ("nge", file);
18377 break;
18379 /* FALLTHRU */
18380 case LT:
18381 fputs ("lt", file);
18382 break;
18383 case UNLE:
18384 if (TARGET_AVX)
18386 fputs ("ngt", file);
18387 break;
18389 /* FALLTHRU */
18390 case LE:
18391 fputs ("le", file);
18392 break;
18393 case UNORDERED:
18394 fputs ("unord", file);
18395 break;
18396 case LTGT:
18397 if (TARGET_AVX)
18399 fputs ("neq_oq", file);
18400 break;
18402 /* FALLTHRU */
18403 case NE:
18404 fputs ("neq", file);
18405 break;
18406 case GE:
18407 if (TARGET_AVX)
18409 fputs ("ge", file);
18410 break;
18412 /* FALLTHRU */
18413 case UNGE:
18414 fputs ("nlt", file);
18415 break;
18416 case GT:
18417 if (TARGET_AVX)
18419 fputs ("gt", file);
18420 break;
18422 /* FALLTHRU */
18423 case UNGT:
18424 fputs ("nle", file);
18425 break;
18426 case ORDERED:
18427 fputs ("ord", file);
18428 break;
18429 default:
18430 output_operand_lossage ("operand is not a condition code, "
18431 "invalid operand code 'D'");
18432 return;
18434 return;
18436 case 'F':
18437 case 'f':
18438 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
18439 if (ASSEMBLER_DIALECT == ASM_ATT)
18440 putc ('.', file);
18441 gcc_fallthrough ();
18442 #endif
18444 case 'C':
18445 case 'c':
18446 if (!COMPARISON_P (x))
18448 output_operand_lossage ("operand is not a condition code, "
18449 "invalid operand code '%c'", code);
18450 return;
18452 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
18453 code == 'c' || code == 'f',
18454 code == 'F' || code == 'f',
18455 file);
18456 return;
18458 case 'H':
18459 if (!offsettable_memref_p (x))
18461 output_operand_lossage ("operand is not an offsettable memory "
18462 "reference, invalid operand code 'H'");
18463 return;
18465 /* It doesn't actually matter what mode we use here, as we're
18466 only going to use this for printing. */
18467 x = adjust_address_nv (x, DImode, 8);
18468 /* Output 'qword ptr' for intel assembler dialect. */
18469 if (ASSEMBLER_DIALECT == ASM_INTEL)
18470 code = 'q';
18471 break;
18473 case 'K':
18474 if (!CONST_INT_P (x))
18476 output_operand_lossage ("operand is not an integer, invalid "
18477 "operand code 'K'");
18478 return;
18481 if (INTVAL (x) & IX86_HLE_ACQUIRE)
18482 #ifdef HAVE_AS_IX86_HLE
18483 fputs ("xacquire ", file);
18484 #else
18485 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
18486 #endif
18487 else if (INTVAL (x) & IX86_HLE_RELEASE)
18488 #ifdef HAVE_AS_IX86_HLE
18489 fputs ("xrelease ", file);
18490 #else
18491 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
18492 #endif
18493 /* We do not want to print value of the operand. */
18494 return;
18496 case 'N':
18497 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
18498 fputs ("{z}", file);
18499 return;
18501 case 'r':
18502 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
18504 output_operand_lossage ("operand is not a specific integer, "
18505 "invalid operand code 'r'");
18506 return;
18509 if (ASSEMBLER_DIALECT == ASM_INTEL)
18510 fputs (", ", file);
18512 fputs ("{sae}", file);
18514 if (ASSEMBLER_DIALECT == ASM_ATT)
18515 fputs (", ", file);
18517 return;
18519 case 'R':
18520 if (!CONST_INT_P (x))
18522 output_operand_lossage ("operand is not an integer, invalid "
18523 "operand code 'R'");
18524 return;
18527 if (ASSEMBLER_DIALECT == ASM_INTEL)
18528 fputs (", ", file);
18530 switch (INTVAL (x))
18532 case ROUND_NEAREST_INT | ROUND_SAE:
18533 fputs ("{rn-sae}", file);
18534 break;
18535 case ROUND_NEG_INF | ROUND_SAE:
18536 fputs ("{rd-sae}", file);
18537 break;
18538 case ROUND_POS_INF | ROUND_SAE:
18539 fputs ("{ru-sae}", file);
18540 break;
18541 case ROUND_ZERO | ROUND_SAE:
18542 fputs ("{rz-sae}", file);
18543 break;
18544 default:
18545 output_operand_lossage ("operand is not a specific integer, "
18546 "invalid operand code 'R'");
18549 if (ASSEMBLER_DIALECT == ASM_ATT)
18550 fputs (", ", file);
18552 return;
18554 case '*':
18555 if (ASSEMBLER_DIALECT == ASM_ATT)
18556 putc ('*', file);
18557 return;
18559 case '&':
18561 const char *name = get_some_local_dynamic_name ();
18562 if (name == NULL)
18563 output_operand_lossage ("'%%&' used without any "
18564 "local dynamic TLS references");
18565 else
18566 assemble_name (file, name);
18567 return;
18570 case '+':
18572 rtx x;
18574 if (!optimize
18575 || optimize_function_for_size_p (cfun)
18576 || !TARGET_BRANCH_PREDICTION_HINTS)
18577 return;
18579 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
18580 if (x)
18582 int pred_val = profile_probability::from_reg_br_prob_note
18583 (XINT (x, 0)).to_reg_br_prob_base ();
18585 if (pred_val < REG_BR_PROB_BASE * 45 / 100
18586 || pred_val > REG_BR_PROB_BASE * 55 / 100)
18588 bool taken = pred_val > REG_BR_PROB_BASE / 2;
18589 bool cputaken
18590 = final_forward_branch_p (current_output_insn) == 0;
18592 /* Emit hints only in the case default branch prediction
18593 heuristics would fail. */
18594 if (taken != cputaken)
18596 /* We use 3e (DS) prefix for taken branches and
18597 2e (CS) prefix for not taken branches. */
18598 if (taken)
18599 fputs ("ds ; ", file);
18600 else
18601 fputs ("cs ; ", file);
18605 return;
18608 case ';':
18609 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
18610 putc (';', file);
18611 #endif
18612 return;
18614 case '~':
18615 putc (TARGET_AVX2 ? 'i' : 'f', file);
18616 return;
18618 case '^':
18619 if (TARGET_64BIT && Pmode != word_mode)
18620 fputs ("addr32 ", file);
18621 return;
18623 case '!':
18624 if (ix86_bnd_prefixed_insn_p (current_output_insn))
18625 fputs ("bnd ", file);
18626 if (ix86_notrack_prefixed_insn_p (current_output_insn))
18627 fputs ("notrack ", file);
18628 return;
18630 default:
18631 output_operand_lossage ("invalid operand code '%c'", code);
18635 if (REG_P (x))
18636 print_reg (x, code, file);
18638 else if (MEM_P (x))
18640 rtx addr = XEXP (x, 0);
18642 /* No `byte ptr' prefix for call instructions ... */
18643 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
18645 machine_mode mode = GET_MODE (x);
18646 const char *size;
18648 /* Check for explicit size override codes. */
18649 if (code == 'b')
18650 size = "BYTE";
18651 else if (code == 'w')
18652 size = "WORD";
18653 else if (code == 'k')
18654 size = "DWORD";
18655 else if (code == 'q')
18656 size = "QWORD";
18657 else if (code == 'x')
18658 size = "XMMWORD";
18659 else if (code == 't')
18660 size = "YMMWORD";
18661 else if (code == 'g')
18662 size = "ZMMWORD";
18663 else if (mode == BLKmode)
18664 /* ... or BLKmode operands, when not overridden. */
18665 size = NULL;
18666 else
18667 switch (GET_MODE_SIZE (mode))
18669 case 1: size = "BYTE"; break;
18670 case 2: size = "WORD"; break;
18671 case 4: size = "DWORD"; break;
18672 case 8: size = "QWORD"; break;
18673 case 12: size = "TBYTE"; break;
18674 case 16:
18675 if (mode == XFmode)
18676 size = "TBYTE";
18677 else
18678 size = "XMMWORD";
18679 break;
18680 case 32: size = "YMMWORD"; break;
18681 case 64: size = "ZMMWORD"; break;
18682 default:
18683 gcc_unreachable ();
18685 if (size)
18687 fputs (size, file);
18688 fputs (" PTR ", file);
18692 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
18693 output_operand_lossage ("invalid constraints for operand");
18694 else
18695 ix86_print_operand_address_as
18696 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
18699 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
18701 long l;
18703 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
18705 if (ASSEMBLER_DIALECT == ASM_ATT)
18706 putc ('$', file);
18707 /* Sign extend 32bit SFmode immediate to 8 bytes. */
18708 if (code == 'q')
18709 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
18710 (unsigned long long) (int) l);
18711 else
18712 fprintf (file, "0x%08x", (unsigned int) l);
18715 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
18717 long l[2];
18719 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
18721 if (ASSEMBLER_DIALECT == ASM_ATT)
18722 putc ('$', file);
18723 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
18726 /* These float cases don't actually occur as immediate operands. */
18727 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
18729 char dstr[30];
18731 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
18732 fputs (dstr, file);
18735 else
18737 /* We have patterns that allow zero sets of memory, for instance.
18738 In 64-bit mode, we should probably support all 8-byte vectors,
18739 since we can in fact encode that into an immediate. */
18740 if (GET_CODE (x) == CONST_VECTOR)
18742 if (x != CONST0_RTX (GET_MODE (x)))
18743 output_operand_lossage ("invalid vector immediate");
18744 x = const0_rtx;
18747 if (code != 'P' && code != 'p')
18749 if (CONST_INT_P (x))
18751 if (ASSEMBLER_DIALECT == ASM_ATT)
18752 putc ('$', file);
18754 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
18755 || GET_CODE (x) == LABEL_REF)
18757 if (ASSEMBLER_DIALECT == ASM_ATT)
18758 putc ('$', file);
18759 else
18760 fputs ("OFFSET FLAT:", file);
18763 if (CONST_INT_P (x))
18764 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
18765 else if (flag_pic || MACHOPIC_INDIRECT)
18766 output_pic_addr_const (file, x, code);
18767 else
18768 output_addr_const (file, x);
18772 static bool
18773 ix86_print_operand_punct_valid_p (unsigned char code)
18775 return (code == '*' || code == '+' || code == '&' || code == ';'
18776 || code == '~' || code == '^' || code == '!');
18779 /* Print a memory operand whose address is ADDR. */
18781 static void
18782 ix86_print_operand_address_as (FILE *file, rtx addr,
18783 addr_space_t as, bool no_rip)
18785 struct ix86_address parts;
18786 rtx base, index, disp;
18787 int scale;
18788 int ok;
18789 bool vsib = false;
18790 int code = 0;
18792 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
18794 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18795 gcc_assert (parts.index == NULL_RTX);
18796 parts.index = XVECEXP (addr, 0, 1);
18797 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
18798 addr = XVECEXP (addr, 0, 0);
18799 vsib = true;
18801 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
18803 gcc_assert (TARGET_64BIT);
18804 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18805 code = 'q';
18807 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
18809 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
18810 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
18811 if (parts.base != NULL_RTX)
18813 parts.index = parts.base;
18814 parts.scale = 1;
18816 parts.base = XVECEXP (addr, 0, 0);
18817 addr = XVECEXP (addr, 0, 0);
18819 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
18821 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18822 gcc_assert (parts.index == NULL_RTX);
18823 parts.index = XVECEXP (addr, 0, 1);
18824 addr = XVECEXP (addr, 0, 0);
18826 else
18827 ok = ix86_decompose_address (addr, &parts);
18829 gcc_assert (ok);
18831 base = parts.base;
18832 index = parts.index;
18833 disp = parts.disp;
18834 scale = parts.scale;
18836 if (ADDR_SPACE_GENERIC_P (as))
18837 as = parts.seg;
18838 else
18839 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
18841 if (!ADDR_SPACE_GENERIC_P (as))
18843 const char *string;
18845 if (as == ADDR_SPACE_SEG_FS)
18846 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:");
18847 else if (as == ADDR_SPACE_SEG_GS)
18848 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:");
18849 else
18850 gcc_unreachable ();
18851 fputs (string, file);
18854 /* Use one byte shorter RIP relative addressing for 64bit mode. */
18855 if (TARGET_64BIT && !base && !index && !no_rip)
18857 rtx symbol = disp;
18859 if (GET_CODE (disp) == CONST
18860 && GET_CODE (XEXP (disp, 0)) == PLUS
18861 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18862 symbol = XEXP (XEXP (disp, 0), 0);
18864 if (GET_CODE (symbol) == LABEL_REF
18865 || (GET_CODE (symbol) == SYMBOL_REF
18866 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
18867 base = pc_rtx;
18870 if (!base && !index)
18872 /* Displacement only requires special attention. */
18873 if (CONST_INT_P (disp))
18875 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
18876 fputs ("ds:", file);
18877 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
18879 /* Load the external function address via the GOT slot to avoid PLT. */
18880 else if (GET_CODE (disp) == CONST
18881 && GET_CODE (XEXP (disp, 0)) == UNSPEC
18882 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
18883 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
18884 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
18885 output_pic_addr_const (file, disp, 0);
18886 else if (flag_pic)
18887 output_pic_addr_const (file, disp, 0);
18888 else
18889 output_addr_const (file, disp);
18891 else
18893 /* Print SImode register names to force addr32 prefix. */
18894 if (SImode_address_operand (addr, VOIDmode))
18896 if (flag_checking)
18898 gcc_assert (TARGET_64BIT);
18899 switch (GET_CODE (addr))
18901 case SUBREG:
18902 gcc_assert (GET_MODE (addr) == SImode);
18903 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
18904 break;
18905 case ZERO_EXTEND:
18906 case AND:
18907 gcc_assert (GET_MODE (addr) == DImode);
18908 break;
18909 default:
18910 gcc_unreachable ();
18913 gcc_assert (!code);
18914 code = 'k';
18916 else if (code == 0
18917 && TARGET_X32
18918 && disp
18919 && CONST_INT_P (disp)
18920 && INTVAL (disp) < -16*1024*1024)
18922 /* X32 runs in 64-bit mode, where displacement, DISP, in
18923 address DISP(%r64), is encoded as 32-bit immediate sign-
18924 extended from 32-bit to 64-bit. For -0x40000300(%r64),
18925 address is %r64 + 0xffffffffbffffd00. When %r64 <
18926 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
18927 which is invalid for x32. The correct address is %r64
18928 - 0x40000300 == 0xf7ffdd64. To properly encode
18929 -0x40000300(%r64) for x32, we zero-extend negative
18930 displacement by forcing addr32 prefix which truncates
18931 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
18932 zero-extend all negative displacements, including -1(%rsp).
18933 However, for small negative displacements, sign-extension
18934 won't cause overflow. We only zero-extend negative
18935 displacements if they < -16*1024*1024, which is also used
18936 to check legitimate address displacements for PIC. */
18937 code = 'k';
18940 /* Since the upper 32 bits of RSP are always zero for x32,
18941 we can encode %esp as %rsp to avoid 0x67 prefix if
18942 there is no index register. */
18943 if (TARGET_X32 && Pmode == SImode
18944 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
18945 code = 'q';
18947 if (ASSEMBLER_DIALECT == ASM_ATT)
18949 if (disp)
18951 if (flag_pic)
18952 output_pic_addr_const (file, disp, 0);
18953 else if (GET_CODE (disp) == LABEL_REF)
18954 output_asm_label (disp);
18955 else
18956 output_addr_const (file, disp);
18959 putc ('(', file);
18960 if (base)
18961 print_reg (base, code, file);
18962 if (index)
18964 putc (',', file);
18965 print_reg (index, vsib ? 0 : code, file);
18966 if (scale != 1 || vsib)
18967 fprintf (file, ",%d", scale);
18969 putc (')', file);
18971 else
18973 rtx offset = NULL_RTX;
18975 if (disp)
18977 /* Pull out the offset of a symbol; print any symbol itself. */
18978 if (GET_CODE (disp) == CONST
18979 && GET_CODE (XEXP (disp, 0)) == PLUS
18980 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18982 offset = XEXP (XEXP (disp, 0), 1);
18983 disp = gen_rtx_CONST (VOIDmode,
18984 XEXP (XEXP (disp, 0), 0));
18987 if (flag_pic)
18988 output_pic_addr_const (file, disp, 0);
18989 else if (GET_CODE (disp) == LABEL_REF)
18990 output_asm_label (disp);
18991 else if (CONST_INT_P (disp))
18992 offset = disp;
18993 else
18994 output_addr_const (file, disp);
18997 putc ('[', file);
18998 if (base)
19000 print_reg (base, code, file);
19001 if (offset)
19003 if (INTVAL (offset) >= 0)
19004 putc ('+', file);
19005 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
19008 else if (offset)
19009 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
19010 else
19011 putc ('0', file);
19013 if (index)
19015 putc ('+', file);
19016 print_reg (index, vsib ? 0 : code, file);
19017 if (scale != 1 || vsib)
19018 fprintf (file, "*%d", scale);
19020 putc (']', file);
19025 static void
19026 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
19028 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
19031 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
19033 static bool
19034 i386_asm_output_addr_const_extra (FILE *file, rtx x)
19036 rtx op;
19038 if (GET_CODE (x) != UNSPEC)
19039 return false;
19041 op = XVECEXP (x, 0, 0);
19042 switch (XINT (x, 1))
19044 case UNSPEC_GOTOFF:
19045 output_addr_const (file, op);
19046 fputs ("@gotoff", file);
19047 break;
19048 case UNSPEC_GOTTPOFF:
19049 output_addr_const (file, op);
19050 /* FIXME: This might be @TPOFF in Sun ld. */
19051 fputs ("@gottpoff", file);
19052 break;
19053 case UNSPEC_TPOFF:
19054 output_addr_const (file, op);
19055 fputs ("@tpoff", file);
19056 break;
19057 case UNSPEC_NTPOFF:
19058 output_addr_const (file, op);
19059 if (TARGET_64BIT)
19060 fputs ("@tpoff", file);
19061 else
19062 fputs ("@ntpoff", file);
19063 break;
19064 case UNSPEC_DTPOFF:
19065 output_addr_const (file, op);
19066 fputs ("@dtpoff", file);
19067 break;
19068 case UNSPEC_GOTNTPOFF:
19069 output_addr_const (file, op);
19070 if (TARGET_64BIT)
19071 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
19072 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
19073 else
19074 fputs ("@gotntpoff", file);
19075 break;
19076 case UNSPEC_INDNTPOFF:
19077 output_addr_const (file, op);
19078 fputs ("@indntpoff", file);
19079 break;
19080 #if TARGET_MACHO
19081 case UNSPEC_MACHOPIC_OFFSET:
19082 output_addr_const (file, op);
19083 putc ('-', file);
19084 machopic_output_function_base_name (file);
19085 break;
19086 #endif
19088 default:
19089 return false;
19092 return true;
19095 /* Split one or more double-mode RTL references into pairs of half-mode
19096 references. The RTL can be REG, offsettable MEM, integer constant, or
19097 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
19098 split and "num" is its length. lo_half and hi_half are output arrays
19099 that parallel "operands". */
19101 void
19102 split_double_mode (machine_mode mode, rtx operands[],
19103 int num, rtx lo_half[], rtx hi_half[])
19105 machine_mode half_mode;
19106 unsigned int byte;
19108 switch (mode)
19110 case E_TImode:
19111 half_mode = DImode;
19112 break;
19113 case E_DImode:
19114 half_mode = SImode;
19115 break;
19116 default:
19117 gcc_unreachable ();
19120 byte = GET_MODE_SIZE (half_mode);
19122 while (num--)
19124 rtx op = operands[num];
19126 /* simplify_subreg refuse to split volatile memory addresses,
19127 but we still have to handle it. */
19128 if (MEM_P (op))
19130 lo_half[num] = adjust_address (op, half_mode, 0);
19131 hi_half[num] = adjust_address (op, half_mode, byte);
19133 else
19135 lo_half[num] = simplify_gen_subreg (half_mode, op,
19136 GET_MODE (op) == VOIDmode
19137 ? mode : GET_MODE (op), 0);
19138 hi_half[num] = simplify_gen_subreg (half_mode, op,
19139 GET_MODE (op) == VOIDmode
19140 ? mode : GET_MODE (op), byte);
19145 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
19146 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
19147 is the expression of the binary operation. The output may either be
19148 emitted here, or returned to the caller, like all output_* functions.
19150 There is no guarantee that the operands are the same mode, as they
19151 might be within FLOAT or FLOAT_EXTEND expressions. */
19153 #ifndef SYSV386_COMPAT
19154 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
19155 wants to fix the assemblers because that causes incompatibility
19156 with gcc. No-one wants to fix gcc because that causes
19157 incompatibility with assemblers... You can use the option of
19158 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
19159 #define SYSV386_COMPAT 1
19160 #endif
19162 const char *
19163 output_387_binary_op (rtx_insn *insn, rtx *operands)
19165 static char buf[40];
19166 const char *p;
19167 bool is_sse
19168 = (SSE_REG_P (operands[0])
19169 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
19171 if (is_sse)
19172 p = "%v";
19173 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
19174 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
19175 p = "fi";
19176 else
19177 p = "f";
19179 strcpy (buf, p);
19181 switch (GET_CODE (operands[3]))
19183 case PLUS:
19184 p = "add"; break;
19185 case MINUS:
19186 p = "sub"; break;
19187 case MULT:
19188 p = "mul"; break;
19189 case DIV:
19190 p = "div"; break;
19191 default:
19192 gcc_unreachable ();
19195 strcat (buf, p);
19197 if (is_sse)
19199 p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
19200 strcat (buf, p);
19202 if (TARGET_AVX)
19203 p = "\t{%2, %1, %0|%0, %1, %2}";
19204 else
19205 p = "\t{%2, %0|%0, %2}";
19207 strcat (buf, p);
19208 return buf;
19211 /* Even if we do not want to check the inputs, this documents input
19212 constraints. Which helps in understanding the following code. */
19213 if (flag_checking)
19215 if (STACK_REG_P (operands[0])
19216 && ((REG_P (operands[1])
19217 && REGNO (operands[0]) == REGNO (operands[1])
19218 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
19219 || (REG_P (operands[2])
19220 && REGNO (operands[0]) == REGNO (operands[2])
19221 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
19222 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
19223 ; /* ok */
19224 else
19225 gcc_unreachable ();
19228 switch (GET_CODE (operands[3]))
19230 case MULT:
19231 case PLUS:
19232 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
19233 std::swap (operands[1], operands[2]);
19235 /* know operands[0] == operands[1]. */
19237 if (MEM_P (operands[2]))
19239 p = "%Z2\t%2";
19240 break;
19243 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
19245 if (STACK_TOP_P (operands[0]))
19246 /* How is it that we are storing to a dead operand[2]?
19247 Well, presumably operands[1] is dead too. We can't
19248 store the result to st(0) as st(0) gets popped on this
19249 instruction. Instead store to operands[2] (which I
19250 think has to be st(1)). st(1) will be popped later.
19251 gcc <= 2.8.1 didn't have this check and generated
19252 assembly code that the Unixware assembler rejected. */
19253 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
19254 else
19255 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
19256 break;
19259 if (STACK_TOP_P (operands[0]))
19260 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
19261 else
19262 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
19263 break;
19265 case MINUS:
19266 case DIV:
19267 if (MEM_P (operands[1]))
19269 p = "r%Z1\t%1";
19270 break;
19273 if (MEM_P (operands[2]))
19275 p = "%Z2\t%2";
19276 break;
19279 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
19281 #if SYSV386_COMPAT
19282 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
19283 derived assemblers, confusingly reverse the direction of
19284 the operation for fsub{r} and fdiv{r} when the
19285 destination register is not st(0). The Intel assembler
19286 doesn't have this brain damage. Read !SYSV386_COMPAT to
19287 figure out what the hardware really does. */
19288 if (STACK_TOP_P (operands[0]))
19289 p = "{p\t%0, %2|rp\t%2, %0}";
19290 else
19291 p = "{rp\t%2, %0|p\t%0, %2}";
19292 #else
19293 if (STACK_TOP_P (operands[0]))
19294 /* As above for fmul/fadd, we can't store to st(0). */
19295 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
19296 else
19297 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
19298 #endif
19299 break;
19302 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19304 #if SYSV386_COMPAT
19305 if (STACK_TOP_P (operands[0]))
19306 p = "{rp\t%0, %1|p\t%1, %0}";
19307 else
19308 p = "{p\t%1, %0|rp\t%0, %1}";
19309 #else
19310 if (STACK_TOP_P (operands[0]))
19311 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
19312 else
19313 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
19314 #endif
19315 break;
19318 if (STACK_TOP_P (operands[0]))
19320 if (STACK_TOP_P (operands[1]))
19321 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
19322 else
19323 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
19324 break;
19326 else if (STACK_TOP_P (operands[1]))
19328 #if SYSV386_COMPAT
19329 p = "{\t%1, %0|r\t%0, %1}";
19330 #else
19331 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
19332 #endif
19334 else
19336 #if SYSV386_COMPAT
19337 p = "{r\t%2, %0|\t%0, %2}";
19338 #else
19339 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
19340 #endif
19342 break;
19344 default:
19345 gcc_unreachable ();
19348 strcat (buf, p);
19349 return buf;
19352 /* Return needed mode for entity in optimize_mode_switching pass. */
19354 static int
19355 ix86_dirflag_mode_needed (rtx_insn *insn)
19357 if (CALL_P (insn))
19359 if (cfun->machine->func_type == TYPE_NORMAL)
19360 return X86_DIRFLAG_ANY;
19361 else
19362 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
19363 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
19366 if (recog_memoized (insn) < 0)
19367 return X86_DIRFLAG_ANY;
19369 if (get_attr_type (insn) == TYPE_STR)
19371 /* Emit cld instruction if stringops are used in the function. */
19372 if (cfun->machine->func_type == TYPE_NORMAL)
19373 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
19374 else
19375 return X86_DIRFLAG_RESET;
19378 return X86_DIRFLAG_ANY;
19381 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
19383 static bool
19384 ix86_check_avx_upper_register (const_rtx exp)
19386 if (SUBREG_P (exp))
19387 exp = SUBREG_REG (exp);
19389 return (REG_P (exp)
19390 && (VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp))
19391 || VALID_AVX512F_REG_OR_XI_MODE (GET_MODE (exp))));
19394 /* Return needed mode for entity in optimize_mode_switching pass. */
19396 static int
19397 ix86_avx_u128_mode_needed (rtx_insn *insn)
19399 if (CALL_P (insn))
19401 rtx link;
19403 /* Needed mode is set to AVX_U128_CLEAN if there are
19404 no 256bit or 512bit modes used in function arguments. */
19405 for (link = CALL_INSN_FUNCTION_USAGE (insn);
19406 link;
19407 link = XEXP (link, 1))
19409 if (GET_CODE (XEXP (link, 0)) == USE)
19411 rtx arg = XEXP (XEXP (link, 0), 0);
19413 if (ix86_check_avx_upper_register (arg))
19414 return AVX_U128_DIRTY;
19418 return AVX_U128_CLEAN;
19421 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
19422 Hardware changes state only when a 256bit register is written to,
19423 but we need to prevent the compiler from moving optimal insertion
19424 point above eventual read from 256bit or 512 bit register. */
19425 subrtx_iterator::array_type array;
19426 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
19427 if (ix86_check_avx_upper_register (*iter))
19428 return AVX_U128_DIRTY;
19430 return AVX_U128_ANY;
19433 /* Return mode that i387 must be switched into
19434 prior to the execution of insn. */
19436 static int
19437 ix86_i387_mode_needed (int entity, rtx_insn *insn)
19439 enum attr_i387_cw mode;
19441 /* The mode UNINITIALIZED is used to store control word after a
19442 function call or ASM pattern. The mode ANY specify that function
19443 has no requirements on the control word and make no changes in the
19444 bits we are interested in. */
19446 if (CALL_P (insn)
19447 || (NONJUMP_INSN_P (insn)
19448 && (asm_noperands (PATTERN (insn)) >= 0
19449 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
19450 return I387_CW_UNINITIALIZED;
19452 if (recog_memoized (insn) < 0)
19453 return I387_CW_ANY;
19455 mode = get_attr_i387_cw (insn);
19457 switch (entity)
19459 case I387_TRUNC:
19460 if (mode == I387_CW_TRUNC)
19461 return mode;
19462 break;
19464 case I387_FLOOR:
19465 if (mode == I387_CW_FLOOR)
19466 return mode;
19467 break;
19469 case I387_CEIL:
19470 if (mode == I387_CW_CEIL)
19471 return mode;
19472 break;
19474 case I387_MASK_PM:
19475 if (mode == I387_CW_MASK_PM)
19476 return mode;
19477 break;
19479 default:
19480 gcc_unreachable ();
19483 return I387_CW_ANY;
19486 /* Return mode that entity must be switched into
19487 prior to the execution of insn. */
19489 static int
19490 ix86_mode_needed (int entity, rtx_insn *insn)
19492 switch (entity)
19494 case X86_DIRFLAG:
19495 return ix86_dirflag_mode_needed (insn);
19496 case AVX_U128:
19497 return ix86_avx_u128_mode_needed (insn);
19498 case I387_TRUNC:
19499 case I387_FLOOR:
19500 case I387_CEIL:
19501 case I387_MASK_PM:
19502 return ix86_i387_mode_needed (entity, insn);
19503 default:
19504 gcc_unreachable ();
19506 return 0;
19509 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
19511 static void
19512 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
19514 if (ix86_check_avx_upper_register (dest))
19516 bool *used = (bool *) data;
19517 *used = true;
19521 /* Calculate mode of upper 128bit AVX registers after the insn. */
19523 static int
19524 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
19526 rtx pat = PATTERN (insn);
19528 if (vzeroupper_operation (pat, VOIDmode)
19529 || vzeroall_operation (pat, VOIDmode))
19530 return AVX_U128_CLEAN;
19532 /* We know that state is clean after CALL insn if there are no
19533 256bit or 512bit registers used in the function return register. */
19534 if (CALL_P (insn))
19536 bool avx_upper_reg_found = false;
19537 note_stores (pat, ix86_check_avx_upper_stores, &avx_upper_reg_found);
19539 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
19542 /* Otherwise, return current mode. Remember that if insn
19543 references AVX 256bit or 512bit registers, the mode was already
19544 changed to DIRTY from MODE_NEEDED. */
19545 return mode;
19548 /* Return the mode that an insn results in. */
19550 static int
19551 ix86_mode_after (int entity, int mode, rtx_insn *insn)
19553 switch (entity)
19555 case X86_DIRFLAG:
19556 return mode;
19557 case AVX_U128:
19558 return ix86_avx_u128_mode_after (mode, insn);
19559 case I387_TRUNC:
19560 case I387_FLOOR:
19561 case I387_CEIL:
19562 case I387_MASK_PM:
19563 return mode;
19564 default:
19565 gcc_unreachable ();
19569 static int
19570 ix86_dirflag_mode_entry (void)
19572 /* For TARGET_CLD or in the interrupt handler we can't assume
19573 direction flag state at function entry. */
19574 if (TARGET_CLD
19575 || cfun->machine->func_type != TYPE_NORMAL)
19576 return X86_DIRFLAG_ANY;
19578 return X86_DIRFLAG_RESET;
19581 static int
19582 ix86_avx_u128_mode_entry (void)
19584 tree arg;
19586 /* Entry mode is set to AVX_U128_DIRTY if there are
19587 256bit or 512bit modes used in function arguments. */
19588 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
19589 arg = TREE_CHAIN (arg))
19591 rtx incoming = DECL_INCOMING_RTL (arg);
19593 if (incoming && ix86_check_avx_upper_register (incoming))
19594 return AVX_U128_DIRTY;
19597 return AVX_U128_CLEAN;
19600 /* Return a mode that ENTITY is assumed to be
19601 switched to at function entry. */
19603 static int
19604 ix86_mode_entry (int entity)
19606 switch (entity)
19608 case X86_DIRFLAG:
19609 return ix86_dirflag_mode_entry ();
19610 case AVX_U128:
19611 return ix86_avx_u128_mode_entry ();
19612 case I387_TRUNC:
19613 case I387_FLOOR:
19614 case I387_CEIL:
19615 case I387_MASK_PM:
19616 return I387_CW_ANY;
19617 default:
19618 gcc_unreachable ();
19622 static int
19623 ix86_avx_u128_mode_exit (void)
19625 rtx reg = crtl->return_rtx;
19627 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
19628 or 512 bit modes used in the function return register. */
19629 if (reg && ix86_check_avx_upper_register (reg))
19630 return AVX_U128_DIRTY;
19632 return AVX_U128_CLEAN;
19635 /* Return a mode that ENTITY is assumed to be
19636 switched to at function exit. */
19638 static int
19639 ix86_mode_exit (int entity)
19641 switch (entity)
19643 case X86_DIRFLAG:
19644 return X86_DIRFLAG_ANY;
19645 case AVX_U128:
19646 return ix86_avx_u128_mode_exit ();
19647 case I387_TRUNC:
19648 case I387_FLOOR:
19649 case I387_CEIL:
19650 case I387_MASK_PM:
19651 return I387_CW_ANY;
19652 default:
19653 gcc_unreachable ();
19657 static int
19658 ix86_mode_priority (int, int n)
19660 return n;
19663 /* Output code to initialize control word copies used by trunc?f?i and
19664 rounding patterns. CURRENT_MODE is set to current control word,
19665 while NEW_MODE is set to new control word. */
19667 static void
19668 emit_i387_cw_initialization (int mode)
19670 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
19671 rtx new_mode;
19673 enum ix86_stack_slot slot;
19675 rtx reg = gen_reg_rtx (HImode);
19677 emit_insn (gen_x86_fnstcw_1 (stored_mode));
19678 emit_move_insn (reg, copy_rtx (stored_mode));
19680 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
19681 || optimize_insn_for_size_p ())
19683 switch (mode)
19685 case I387_CW_TRUNC:
19686 /* round toward zero (truncate) */
19687 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
19688 slot = SLOT_CW_TRUNC;
19689 break;
19691 case I387_CW_FLOOR:
19692 /* round down toward -oo */
19693 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
19694 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
19695 slot = SLOT_CW_FLOOR;
19696 break;
19698 case I387_CW_CEIL:
19699 /* round up toward +oo */
19700 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
19701 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
19702 slot = SLOT_CW_CEIL;
19703 break;
19705 case I387_CW_MASK_PM:
19706 /* mask precision exception for nearbyint() */
19707 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
19708 slot = SLOT_CW_MASK_PM;
19709 break;
19711 default:
19712 gcc_unreachable ();
19715 else
19717 switch (mode)
19719 case I387_CW_TRUNC:
19720 /* round toward zero (truncate) */
19721 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
19722 slot = SLOT_CW_TRUNC;
19723 break;
19725 case I387_CW_FLOOR:
19726 /* round down toward -oo */
19727 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
19728 slot = SLOT_CW_FLOOR;
19729 break;
19731 case I387_CW_CEIL:
19732 /* round up toward +oo */
19733 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
19734 slot = SLOT_CW_CEIL;
19735 break;
19737 case I387_CW_MASK_PM:
19738 /* mask precision exception for nearbyint() */
19739 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
19740 slot = SLOT_CW_MASK_PM;
19741 break;
19743 default:
19744 gcc_unreachable ();
19748 gcc_assert (slot < MAX_386_STACK_LOCALS);
19750 new_mode = assign_386_stack_local (HImode, slot);
19751 emit_move_insn (new_mode, reg);
19754 /* Emit vzeroupper. */
19756 void
19757 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
19759 int i;
19761 /* Cancel automatic vzeroupper insertion if there are
19762 live call-saved SSE registers at the insertion point. */
19764 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19765 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
19766 return;
19768 if (TARGET_64BIT)
19769 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19770 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
19771 return;
19773 emit_insn (gen_avx_vzeroupper ());
19776 /* Generate one or more insns to set ENTITY to MODE. */
19778 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
19779 is the set of hard registers live at the point where the insn(s)
19780 are to be inserted. */
19782 static void
19783 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
19784 HARD_REG_SET regs_live)
19786 switch (entity)
19788 case X86_DIRFLAG:
19789 if (mode == X86_DIRFLAG_RESET)
19790 emit_insn (gen_cld ());
19791 break;
19792 case AVX_U128:
19793 if (mode == AVX_U128_CLEAN)
19794 ix86_avx_emit_vzeroupper (regs_live);
19795 break;
19796 case I387_TRUNC:
19797 case I387_FLOOR:
19798 case I387_CEIL:
19799 case I387_MASK_PM:
19800 if (mode != I387_CW_ANY
19801 && mode != I387_CW_UNINITIALIZED)
19802 emit_i387_cw_initialization (mode);
19803 break;
19804 default:
19805 gcc_unreachable ();
19809 /* Output code for INSN to convert a float to a signed int. OPERANDS
19810 are the insn operands. The output may be [HSD]Imode and the input
19811 operand may be [SDX]Fmode. */
19813 const char *
19814 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
19816 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
19817 bool dimode_p = GET_MODE (operands[0]) == DImode;
19818 int round_mode = get_attr_i387_cw (insn);
19820 static char buf[40];
19821 const char *p;
19823 /* Jump through a hoop or two for DImode, since the hardware has no
19824 non-popping instruction. We used to do this a different way, but
19825 that was somewhat fragile and broke with post-reload splitters. */
19826 if ((dimode_p || fisttp) && !stack_top_dies)
19827 output_asm_insn ("fld\t%y1", operands);
19829 gcc_assert (STACK_TOP_P (operands[1]));
19830 gcc_assert (MEM_P (operands[0]));
19831 gcc_assert (GET_MODE (operands[1]) != TFmode);
19833 if (fisttp)
19834 return "fisttp%Z0\t%0";
19836 strcpy (buf, "fist");
19838 if (round_mode != I387_CW_ANY)
19839 output_asm_insn ("fldcw\t%3", operands);
19841 p = "p%Z0\t%0";
19842 strcat (buf, p + !(stack_top_dies || dimode_p));
19844 output_asm_insn (buf, operands);
19846 if (round_mode != I387_CW_ANY)
19847 output_asm_insn ("fldcw\t%2", operands);
19849 return "";
19852 /* Output code for x87 ffreep insn. The OPNO argument, which may only
19853 have the values zero or one, indicates the ffreep insn's operand
19854 from the OPERANDS array. */
19856 static const char *
19857 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
19859 if (TARGET_USE_FFREEP)
19860 #ifdef HAVE_AS_IX86_FFREEP
19861 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
19862 #else
19864 static char retval[32];
19865 int regno = REGNO (operands[opno]);
19867 gcc_assert (STACK_REGNO_P (regno));
19869 regno -= FIRST_STACK_REG;
19871 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
19872 return retval;
19874 #endif
19876 return opno ? "fstp\t%y1" : "fstp\t%y0";
19880 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
19881 should be used. UNORDERED_P is true when fucom should be used. */
19883 const char *
19884 output_fp_compare (rtx_insn *insn, rtx *operands,
19885 bool eflags_p, bool unordered_p)
19887 rtx *xops = eflags_p ? &operands[0] : &operands[1];
19888 bool stack_top_dies;
19890 static char buf[40];
19891 const char *p;
19893 gcc_assert (STACK_TOP_P (xops[0]));
19895 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
19897 if (eflags_p)
19899 p = unordered_p ? "fucomi" : "fcomi";
19900 strcpy (buf, p);
19902 p = "p\t{%y1, %0|%0, %y1}";
19903 strcat (buf, p + !stack_top_dies);
19905 return buf;
19908 if (STACK_REG_P (xops[1])
19909 && stack_top_dies
19910 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
19912 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
19914 /* If both the top of the 387 stack die, and the other operand
19915 is also a stack register that dies, then this must be a
19916 `fcompp' float compare. */
19917 p = unordered_p ? "fucompp" : "fcompp";
19918 strcpy (buf, p);
19920 else if (const0_operand (xops[1], VOIDmode))
19922 gcc_assert (!unordered_p);
19923 strcpy (buf, "ftst");
19925 else
19927 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
19929 gcc_assert (!unordered_p);
19930 p = "ficom";
19932 else
19933 p = unordered_p ? "fucom" : "fcom";
19935 strcpy (buf, p);
19937 p = "p%Z2\t%y2";
19938 strcat (buf, p + !stack_top_dies);
19941 output_asm_insn (buf, operands);
19942 return "fnstsw\t%0";
19945 void
19946 ix86_output_addr_vec_elt (FILE *file, int value)
19948 const char *directive = ASM_LONG;
19950 #ifdef ASM_QUAD
19951 if (TARGET_LP64)
19952 directive = ASM_QUAD;
19953 #else
19954 gcc_assert (!TARGET_64BIT);
19955 #endif
19957 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
19960 void
19961 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
19963 const char *directive = ASM_LONG;
19965 #ifdef ASM_QUAD
19966 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
19967 directive = ASM_QUAD;
19968 #else
19969 gcc_assert (!TARGET_64BIT);
19970 #endif
19971 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
19972 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
19973 fprintf (file, "%s%s%d-%s%d\n",
19974 directive, LPREFIX, value, LPREFIX, rel);
19975 else if (HAVE_AS_GOTOFF_IN_DATA)
19976 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
19977 #if TARGET_MACHO
19978 else if (TARGET_MACHO)
19980 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
19981 machopic_output_function_base_name (file);
19982 putc ('\n', file);
19984 #endif
19985 else
19986 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
19987 GOT_SYMBOL_NAME, LPREFIX, value);
19990 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
19991 for the target. */
19993 void
19994 ix86_expand_clear (rtx dest)
19996 rtx tmp;
19998 /* We play register width games, which are only valid after reload. */
19999 gcc_assert (reload_completed);
20001 /* Avoid HImode and its attendant prefix byte. */
20002 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
20003 dest = gen_rtx_REG (SImode, REGNO (dest));
20004 tmp = gen_rtx_SET (dest, const0_rtx);
20006 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
20008 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20009 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
20012 emit_insn (tmp);
20015 void
20016 ix86_expand_move (machine_mode mode, rtx operands[])
20018 rtx op0, op1;
20019 rtx tmp, addend = NULL_RTX;
20020 enum tls_model model;
20022 op0 = operands[0];
20023 op1 = operands[1];
20025 switch (GET_CODE (op1))
20027 case CONST:
20028 tmp = XEXP (op1, 0);
20030 if (GET_CODE (tmp) != PLUS
20031 || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF)
20032 break;
20034 op1 = XEXP (tmp, 0);
20035 addend = XEXP (tmp, 1);
20036 /* FALLTHRU */
20038 case SYMBOL_REF:
20039 model = SYMBOL_REF_TLS_MODEL (op1);
20041 if (model)
20042 op1 = legitimize_tls_address (op1, model, true);
20043 else if (ix86_force_load_from_GOT_p (op1))
20045 /* Load the external function address via GOT slot to avoid PLT. */
20046 op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
20047 (TARGET_64BIT
20048 ? UNSPEC_GOTPCREL
20049 : UNSPEC_GOT));
20050 op1 = gen_rtx_CONST (Pmode, op1);
20051 op1 = gen_const_mem (Pmode, op1);
20052 set_mem_alias_set (op1, ix86_GOT_alias_set ());
20054 else
20056 tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX);
20057 if (tmp)
20059 op1 = tmp;
20060 if (!addend)
20061 break;
20063 else
20065 op1 = operands[1];
20066 break;
20070 if (addend)
20072 op1 = force_operand (op1, NULL_RTX);
20073 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
20074 op0, 1, OPTAB_DIRECT);
20076 else
20077 op1 = force_operand (op1, op0);
20079 if (op1 == op0)
20080 return;
20082 op1 = convert_to_mode (mode, op1, 1);
20084 default:
20085 break;
20088 if ((flag_pic || MACHOPIC_INDIRECT)
20089 && symbolic_operand (op1, mode))
20091 if (TARGET_MACHO && !TARGET_64BIT)
20093 #if TARGET_MACHO
20094 /* dynamic-no-pic */
20095 if (MACHOPIC_INDIRECT)
20097 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
20098 ? op0 : gen_reg_rtx (Pmode);
20099 op1 = machopic_indirect_data_reference (op1, temp);
20100 if (MACHOPIC_PURE)
20101 op1 = machopic_legitimize_pic_address (op1, mode,
20102 temp == op1 ? 0 : temp);
20104 if (op0 != op1 && GET_CODE (op0) != MEM)
20106 rtx insn = gen_rtx_SET (op0, op1);
20107 emit_insn (insn);
20108 return;
20110 if (GET_CODE (op0) == MEM)
20111 op1 = force_reg (Pmode, op1);
20112 else
20114 rtx temp = op0;
20115 if (GET_CODE (temp) != REG)
20116 temp = gen_reg_rtx (Pmode);
20117 temp = legitimize_pic_address (op1, temp);
20118 if (temp == op0)
20119 return;
20120 op1 = temp;
20122 /* dynamic-no-pic */
20123 #endif
20125 else
20127 if (MEM_P (op0))
20128 op1 = force_reg (mode, op1);
20129 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
20131 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
20132 op1 = legitimize_pic_address (op1, reg);
20133 if (op0 == op1)
20134 return;
20135 op1 = convert_to_mode (mode, op1, 1);
20139 else
20141 if (MEM_P (op0)
20142 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
20143 || !push_operand (op0, mode))
20144 && MEM_P (op1))
20145 op1 = force_reg (mode, op1);
20147 if (push_operand (op0, mode)
20148 && ! general_no_elim_operand (op1, mode))
20149 op1 = copy_to_mode_reg (mode, op1);
20151 /* Force large constants in 64bit compilation into register
20152 to get them CSEed. */
20153 if (can_create_pseudo_p ()
20154 && (mode == DImode) && TARGET_64BIT
20155 && immediate_operand (op1, mode)
20156 && !x86_64_zext_immediate_operand (op1, VOIDmode)
20157 && !register_operand (op0, mode)
20158 && optimize)
20159 op1 = copy_to_mode_reg (mode, op1);
20161 if (can_create_pseudo_p ()
20162 && CONST_DOUBLE_P (op1))
20164 /* If we are loading a floating point constant to a register,
20165 force the value to memory now, since we'll get better code
20166 out the back end. */
20168 op1 = validize_mem (force_const_mem (mode, op1));
20169 if (!register_operand (op0, mode))
20171 rtx temp = gen_reg_rtx (mode);
20172 emit_insn (gen_rtx_SET (temp, op1));
20173 emit_move_insn (op0, temp);
20174 return;
20179 emit_insn (gen_rtx_SET (op0, op1));
20182 void
20183 ix86_expand_vector_move (machine_mode mode, rtx operands[])
20185 rtx op0 = operands[0], op1 = operands[1];
20186 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
20187 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
20188 unsigned int align = (TARGET_IAMCU
20189 ? GET_MODE_BITSIZE (mode)
20190 : GET_MODE_ALIGNMENT (mode));
20192 if (push_operand (op0, VOIDmode))
20193 op0 = emit_move_resolve_push (mode, op0);
20195 /* Force constants other than zero into memory. We do not know how
20196 the instructions used to build constants modify the upper 64 bits
20197 of the register, once we have that information we may be able
20198 to handle some of them more efficiently. */
20199 if (can_create_pseudo_p ()
20200 && (CONSTANT_P (op1)
20201 || (SUBREG_P (op1)
20202 && CONSTANT_P (SUBREG_REG (op1))))
20203 && ((register_operand (op0, mode)
20204 && !standard_sse_constant_p (op1, mode))
20205 /* ix86_expand_vector_move_misalign() does not like constants. */
20206 || (SSE_REG_MODE_P (mode)
20207 && MEM_P (op0)
20208 && MEM_ALIGN (op0) < align)))
20210 if (SUBREG_P (op1))
20212 machine_mode imode = GET_MODE (SUBREG_REG (op1));
20213 rtx r = force_const_mem (imode, SUBREG_REG (op1));
20214 if (r)
20215 r = validize_mem (r);
20216 else
20217 r = force_reg (imode, SUBREG_REG (op1));
20218 op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1));
20220 else
20221 op1 = validize_mem (force_const_mem (mode, op1));
20224 /* We need to check memory alignment for SSE mode since attribute
20225 can make operands unaligned. */
20226 if (can_create_pseudo_p ()
20227 && SSE_REG_MODE_P (mode)
20228 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
20229 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
20231 rtx tmp[2];
20233 /* ix86_expand_vector_move_misalign() does not like both
20234 arguments in memory. */
20235 if (!register_operand (op0, mode)
20236 && !register_operand (op1, mode))
20237 op1 = force_reg (mode, op1);
20239 tmp[0] = op0; tmp[1] = op1;
20240 ix86_expand_vector_move_misalign (mode, tmp);
20241 return;
20244 /* Make operand1 a register if it isn't already. */
20245 if (can_create_pseudo_p ()
20246 && !register_operand (op0, mode)
20247 && !register_operand (op1, mode))
20249 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
20250 return;
20253 emit_insn (gen_rtx_SET (op0, op1));
20256 /* Split 32-byte AVX unaligned load and store if needed. */
20258 static void
20259 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
20261 rtx m;
20262 rtx (*extract) (rtx, rtx, rtx);
20263 machine_mode mode;
20265 if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
20266 || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE))
20268 emit_insn (gen_rtx_SET (op0, op1));
20269 return;
20272 rtx orig_op0 = NULL_RTX;
20273 mode = GET_MODE (op0);
20274 switch (GET_MODE_CLASS (mode))
20276 case MODE_VECTOR_INT:
20277 case MODE_INT:
20278 if (mode != V32QImode)
20280 if (!MEM_P (op0))
20282 orig_op0 = op0;
20283 op0 = gen_reg_rtx (V32QImode);
20285 else
20286 op0 = gen_lowpart (V32QImode, op0);
20287 op1 = gen_lowpart (V32QImode, op1);
20288 mode = V32QImode;
20290 break;
20291 case MODE_VECTOR_FLOAT:
20292 break;
20293 default:
20294 gcc_unreachable ();
20297 switch (mode)
20299 default:
20300 gcc_unreachable ();
20301 case E_V32QImode:
20302 extract = gen_avx_vextractf128v32qi;
20303 mode = V16QImode;
20304 break;
20305 case E_V8SFmode:
20306 extract = gen_avx_vextractf128v8sf;
20307 mode = V4SFmode;
20308 break;
20309 case E_V4DFmode:
20310 extract = gen_avx_vextractf128v4df;
20311 mode = V2DFmode;
20312 break;
20315 if (MEM_P (op1))
20317 rtx r = gen_reg_rtx (mode);
20318 m = adjust_address (op1, mode, 0);
20319 emit_move_insn (r, m);
20320 m = adjust_address (op1, mode, 16);
20321 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
20322 emit_move_insn (op0, r);
20324 else if (MEM_P (op0))
20326 m = adjust_address (op0, mode, 0);
20327 emit_insn (extract (m, op1, const0_rtx));
20328 m = adjust_address (op0, mode, 16);
20329 emit_insn (extract (m, copy_rtx (op1), const1_rtx));
20331 else
20332 gcc_unreachable ();
20334 if (orig_op0)
20335 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
20338 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
20339 straight to ix86_expand_vector_move. */
20340 /* Code generation for scalar reg-reg moves of single and double precision data:
20341 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
20342 movaps reg, reg
20343 else
20344 movss reg, reg
20345 if (x86_sse_partial_reg_dependency == true)
20346 movapd reg, reg
20347 else
20348 movsd reg, reg
20350 Code generation for scalar loads of double precision data:
20351 if (x86_sse_split_regs == true)
20352 movlpd mem, reg (gas syntax)
20353 else
20354 movsd mem, reg
20356 Code generation for unaligned packed loads of single precision data
20357 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
20358 if (x86_sse_unaligned_move_optimal)
20359 movups mem, reg
20361 if (x86_sse_partial_reg_dependency == true)
20363 xorps reg, reg
20364 movlps mem, reg
20365 movhps mem+8, reg
20367 else
20369 movlps mem, reg
20370 movhps mem+8, reg
20373 Code generation for unaligned packed loads of double precision data
20374 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
20375 if (x86_sse_unaligned_move_optimal)
20376 movupd mem, reg
20378 if (x86_sse_split_regs == true)
20380 movlpd mem, reg
20381 movhpd mem+8, reg
20383 else
20385 movsd mem, reg
20386 movhpd mem+8, reg
20390 void
20391 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
20393 rtx op0, op1, m;
20395 op0 = operands[0];
20396 op1 = operands[1];
20398 /* Use unaligned load/store for AVX512 or when optimizing for size. */
20399 if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ())
20401 emit_insn (gen_rtx_SET (op0, op1));
20402 return;
20405 if (TARGET_AVX)
20407 if (GET_MODE_SIZE (mode) == 32)
20408 ix86_avx256_split_vector_move_misalign (op0, op1);
20409 else
20410 /* Always use 128-bit mov<mode>_internal pattern for AVX. */
20411 emit_insn (gen_rtx_SET (op0, op1));
20412 return;
20415 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
20416 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
20418 emit_insn (gen_rtx_SET (op0, op1));
20419 return;
20422 /* ??? If we have typed data, then it would appear that using
20423 movdqu is the only way to get unaligned data loaded with
20424 integer type. */
20425 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
20427 emit_insn (gen_rtx_SET (op0, op1));
20428 return;
20431 if (MEM_P (op1))
20433 if (TARGET_SSE2 && mode == V2DFmode)
20435 rtx zero;
20437 /* When SSE registers are split into halves, we can avoid
20438 writing to the top half twice. */
20439 if (TARGET_SSE_SPLIT_REGS)
20441 emit_clobber (op0);
20442 zero = op0;
20444 else
20446 /* ??? Not sure about the best option for the Intel chips.
20447 The following would seem to satisfy; the register is
20448 entirely cleared, breaking the dependency chain. We
20449 then store to the upper half, with a dependency depth
20450 of one. A rumor has it that Intel recommends two movsd
20451 followed by an unpacklpd, but this is unconfirmed. And
20452 given that the dependency depth of the unpacklpd would
20453 still be one, I'm not sure why this would be better. */
20454 zero = CONST0_RTX (V2DFmode);
20457 m = adjust_address (op1, DFmode, 0);
20458 emit_insn (gen_sse2_loadlpd (op0, zero, m));
20459 m = adjust_address (op1, DFmode, 8);
20460 emit_insn (gen_sse2_loadhpd (op0, op0, m));
20462 else
20464 rtx t;
20466 if (mode != V4SFmode)
20467 t = gen_reg_rtx (V4SFmode);
20468 else
20469 t = op0;
20471 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
20472 emit_move_insn (t, CONST0_RTX (V4SFmode));
20473 else
20474 emit_clobber (t);
20476 m = adjust_address (op1, V2SFmode, 0);
20477 emit_insn (gen_sse_loadlps (t, t, m));
20478 m = adjust_address (op1, V2SFmode, 8);
20479 emit_insn (gen_sse_loadhps (t, t, m));
20480 if (mode != V4SFmode)
20481 emit_move_insn (op0, gen_lowpart (mode, t));
20484 else if (MEM_P (op0))
20486 if (TARGET_SSE2 && mode == V2DFmode)
20488 m = adjust_address (op0, DFmode, 0);
20489 emit_insn (gen_sse2_storelpd (m, op1));
20490 m = adjust_address (op0, DFmode, 8);
20491 emit_insn (gen_sse2_storehpd (m, op1));
20493 else
20495 if (mode != V4SFmode)
20496 op1 = gen_lowpart (V4SFmode, op1);
20498 m = adjust_address (op0, V2SFmode, 0);
20499 emit_insn (gen_sse_storelps (m, op1));
20500 m = adjust_address (op0, V2SFmode, 8);
20501 emit_insn (gen_sse_storehps (m, copy_rtx (op1)));
20504 else
20505 gcc_unreachable ();
20508 /* Helper function of ix86_fixup_binary_operands to canonicalize
20509 operand order. Returns true if the operands should be swapped. */
20511 static bool
20512 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
20513 rtx operands[])
20515 rtx dst = operands[0];
20516 rtx src1 = operands[1];
20517 rtx src2 = operands[2];
20519 /* If the operation is not commutative, we can't do anything. */
20520 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH
20521 && GET_RTX_CLASS (code) != RTX_COMM_COMPARE)
20522 return false;
20524 /* Highest priority is that src1 should match dst. */
20525 if (rtx_equal_p (dst, src1))
20526 return false;
20527 if (rtx_equal_p (dst, src2))
20528 return true;
20530 /* Next highest priority is that immediate constants come second. */
20531 if (immediate_operand (src2, mode))
20532 return false;
20533 if (immediate_operand (src1, mode))
20534 return true;
20536 /* Lowest priority is that memory references should come second. */
20537 if (MEM_P (src2))
20538 return false;
20539 if (MEM_P (src1))
20540 return true;
20542 return false;
20546 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
20547 destination to use for the operation. If different from the true
20548 destination in operands[0], a copy operation will be required. */
20551 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
20552 rtx operands[])
20554 rtx dst = operands[0];
20555 rtx src1 = operands[1];
20556 rtx src2 = operands[2];
20558 /* Canonicalize operand order. */
20559 if (ix86_swap_binary_operands_p (code, mode, operands))
20561 /* It is invalid to swap operands of different modes. */
20562 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
20564 std::swap (src1, src2);
20567 /* Both source operands cannot be in memory. */
20568 if (MEM_P (src1) && MEM_P (src2))
20570 /* Optimization: Only read from memory once. */
20571 if (rtx_equal_p (src1, src2))
20573 src2 = force_reg (mode, src2);
20574 src1 = src2;
20576 else if (rtx_equal_p (dst, src1))
20577 src2 = force_reg (mode, src2);
20578 else
20579 src1 = force_reg (mode, src1);
20582 /* If the destination is memory, and we do not have matching source
20583 operands, do things in registers. */
20584 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
20585 dst = gen_reg_rtx (mode);
20587 /* Source 1 cannot be a constant. */
20588 if (CONSTANT_P (src1))
20589 src1 = force_reg (mode, src1);
20591 /* Source 1 cannot be a non-matching memory. */
20592 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
20593 src1 = force_reg (mode, src1);
20595 /* Improve address combine. */
20596 if (code == PLUS
20597 && GET_MODE_CLASS (mode) == MODE_INT
20598 && MEM_P (src2))
20599 src2 = force_reg (mode, src2);
20601 operands[1] = src1;
20602 operands[2] = src2;
20603 return dst;
20606 /* Similarly, but assume that the destination has already been
20607 set up properly. */
20609 void
20610 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
20611 machine_mode mode, rtx operands[])
20613 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
20614 gcc_assert (dst == operands[0]);
20617 /* Attempt to expand a binary operator. Make the expansion closer to the
20618 actual machine, then just general_operand, which will allow 3 separate
20619 memory references (one output, two input) in a single insn. */
20621 void
20622 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
20623 rtx operands[])
20625 rtx src1, src2, dst, op, clob;
20627 dst = ix86_fixup_binary_operands (code, mode, operands);
20628 src1 = operands[1];
20629 src2 = operands[2];
20631 /* Emit the instruction. */
20633 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
20635 if (reload_completed
20636 && code == PLUS
20637 && !rtx_equal_p (dst, src1))
20639 /* This is going to be an LEA; avoid splitting it later. */
20640 emit_insn (op);
20642 else
20644 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20645 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20648 /* Fix up the destination if needed. */
20649 if (dst != operands[0])
20650 emit_move_insn (operands[0], dst);
20653 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
20654 the given OPERANDS. */
20656 void
20657 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
20658 rtx operands[])
20660 rtx op1 = NULL_RTX, op2 = NULL_RTX;
20661 if (SUBREG_P (operands[1]))
20663 op1 = operands[1];
20664 op2 = operands[2];
20666 else if (SUBREG_P (operands[2]))
20668 op1 = operands[2];
20669 op2 = operands[1];
20671 /* Optimize (__m128i) d | (__m128i) e and similar code
20672 when d and e are float vectors into float vector logical
20673 insn. In C/C++ without using intrinsics there is no other way
20674 to express vector logical operation on float vectors than
20675 to cast them temporarily to integer vectors. */
20676 if (op1
20677 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
20678 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
20679 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
20680 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
20681 && SUBREG_BYTE (op1) == 0
20682 && (GET_CODE (op2) == CONST_VECTOR
20683 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
20684 && SUBREG_BYTE (op2) == 0))
20685 && can_create_pseudo_p ())
20687 rtx dst;
20688 switch (GET_MODE (SUBREG_REG (op1)))
20690 case E_V4SFmode:
20691 case E_V8SFmode:
20692 case E_V16SFmode:
20693 case E_V2DFmode:
20694 case E_V4DFmode:
20695 case E_V8DFmode:
20696 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
20697 if (GET_CODE (op2) == CONST_VECTOR)
20699 op2 = gen_lowpart (GET_MODE (dst), op2);
20700 op2 = force_reg (GET_MODE (dst), op2);
20702 else
20704 op1 = operands[1];
20705 op2 = SUBREG_REG (operands[2]);
20706 if (!vector_operand (op2, GET_MODE (dst)))
20707 op2 = force_reg (GET_MODE (dst), op2);
20709 op1 = SUBREG_REG (op1);
20710 if (!vector_operand (op1, GET_MODE (dst)))
20711 op1 = force_reg (GET_MODE (dst), op1);
20712 emit_insn (gen_rtx_SET (dst,
20713 gen_rtx_fmt_ee (code, GET_MODE (dst),
20714 op1, op2)));
20715 emit_move_insn (operands[0], gen_lowpart (mode, dst));
20716 return;
20717 default:
20718 break;
20721 if (!vector_operand (operands[1], mode))
20722 operands[1] = force_reg (mode, operands[1]);
20723 if (!vector_operand (operands[2], mode))
20724 operands[2] = force_reg (mode, operands[2]);
20725 ix86_fixup_binary_operands_no_copy (code, mode, operands);
20726 emit_insn (gen_rtx_SET (operands[0],
20727 gen_rtx_fmt_ee (code, mode, operands[1],
20728 operands[2])));
20731 /* Return TRUE or FALSE depending on whether the binary operator meets the
20732 appropriate constraints. */
20734 bool
20735 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
20736 rtx operands[3])
20738 rtx dst = operands[0];
20739 rtx src1 = operands[1];
20740 rtx src2 = operands[2];
20742 /* Both source operands cannot be in memory. */
20743 if (MEM_P (src1) && MEM_P (src2))
20744 return false;
20746 /* Canonicalize operand order for commutative operators. */
20747 if (ix86_swap_binary_operands_p (code, mode, operands))
20748 std::swap (src1, src2);
20750 /* If the destination is memory, we must have a matching source operand. */
20751 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
20752 return false;
20754 /* Source 1 cannot be a constant. */
20755 if (CONSTANT_P (src1))
20756 return false;
20758 /* Source 1 cannot be a non-matching memory. */
20759 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
20760 /* Support "andhi/andsi/anddi" as a zero-extending move. */
20761 return (code == AND
20762 && (mode == HImode
20763 || mode == SImode
20764 || (TARGET_64BIT && mode == DImode))
20765 && satisfies_constraint_L (src2));
20767 return true;
20770 /* Attempt to expand a unary operator. Make the expansion closer to the
20771 actual machine, then just general_operand, which will allow 2 separate
20772 memory references (one output, one input) in a single insn. */
20774 void
20775 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
20776 rtx operands[])
20778 bool matching_memory = false;
20779 rtx src, dst, op, clob;
20781 dst = operands[0];
20782 src = operands[1];
20784 /* If the destination is memory, and we do not have matching source
20785 operands, do things in registers. */
20786 if (MEM_P (dst))
20788 if (rtx_equal_p (dst, src))
20789 matching_memory = true;
20790 else
20791 dst = gen_reg_rtx (mode);
20794 /* When source operand is memory, destination must match. */
20795 if (MEM_P (src) && !matching_memory)
20796 src = force_reg (mode, src);
20798 /* Emit the instruction. */
20800 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
20802 if (code == NOT)
20803 emit_insn (op);
20804 else
20806 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20807 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20810 /* Fix up the destination if needed. */
20811 if (dst != operands[0])
20812 emit_move_insn (operands[0], dst);
20815 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
20816 divisor are within the range [0-255]. */
20818 void
20819 ix86_split_idivmod (machine_mode mode, rtx operands[],
20820 bool signed_p)
20822 rtx_code_label *end_label, *qimode_label;
20823 rtx div, mod;
20824 rtx_insn *insn;
20825 rtx scratch, tmp0, tmp1, tmp2;
20826 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
20827 rtx (*gen_zero_extend) (rtx, rtx);
20828 rtx (*gen_test_ccno_1) (rtx, rtx);
20830 switch (mode)
20832 case E_SImode:
20833 if (GET_MODE (operands[0]) == SImode)
20835 if (GET_MODE (operands[1]) == SImode)
20836 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
20837 else
20838 gen_divmod4_1
20839 = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2;
20840 gen_zero_extend = gen_zero_extendqisi2;
20842 else
20844 gen_divmod4_1
20845 = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1;
20846 gen_zero_extend = gen_zero_extendqidi2;
20848 gen_test_ccno_1 = gen_testsi_ccno_1;
20849 break;
20850 case E_DImode:
20851 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
20852 gen_test_ccno_1 = gen_testdi_ccno_1;
20853 gen_zero_extend = gen_zero_extendqidi2;
20854 break;
20855 default:
20856 gcc_unreachable ();
20859 end_label = gen_label_rtx ();
20860 qimode_label = gen_label_rtx ();
20862 scratch = gen_reg_rtx (mode);
20864 /* Use 8bit unsigned divimod if dividend and divisor are within
20865 the range [0-255]. */
20866 emit_move_insn (scratch, operands[2]);
20867 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
20868 scratch, 1, OPTAB_DIRECT);
20869 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
20870 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
20871 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
20872 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
20873 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
20874 pc_rtx);
20875 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
20876 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20877 JUMP_LABEL (insn) = qimode_label;
20879 /* Generate original signed/unsigned divimod. */
20880 div = gen_divmod4_1 (operands[0], operands[1],
20881 operands[2], operands[3]);
20882 emit_insn (div);
20884 /* Branch to the end. */
20885 emit_jump_insn (gen_jump (end_label));
20886 emit_barrier ();
20888 /* Generate 8bit unsigned divide. */
20889 emit_label (qimode_label);
20890 /* Don't use operands[0] for result of 8bit divide since not all
20891 registers support QImode ZERO_EXTRACT. */
20892 tmp0 = lowpart_subreg (HImode, scratch, mode);
20893 tmp1 = lowpart_subreg (HImode, operands[2], mode);
20894 tmp2 = lowpart_subreg (QImode, operands[3], mode);
20895 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
20897 if (signed_p)
20899 div = gen_rtx_DIV (mode, operands[2], operands[3]);
20900 mod = gen_rtx_MOD (mode, operands[2], operands[3]);
20902 else
20904 div = gen_rtx_UDIV (mode, operands[2], operands[3]);
20905 mod = gen_rtx_UMOD (mode, operands[2], operands[3]);
20907 if (mode == SImode)
20909 if (GET_MODE (operands[0]) != SImode)
20910 div = gen_rtx_ZERO_EXTEND (DImode, div);
20911 if (GET_MODE (operands[1]) != SImode)
20912 mod = gen_rtx_ZERO_EXTEND (DImode, mod);
20915 /* Extract remainder from AH. */
20916 tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]),
20917 tmp0, GEN_INT (8), GEN_INT (8));
20918 if (REG_P (operands[1]))
20919 insn = emit_move_insn (operands[1], tmp1);
20920 else
20922 /* Need a new scratch register since the old one has result
20923 of 8bit divide. */
20924 scratch = gen_reg_rtx (GET_MODE (operands[1]));
20925 emit_move_insn (scratch, tmp1);
20926 insn = emit_move_insn (operands[1], scratch);
20928 set_unique_reg_note (insn, REG_EQUAL, mod);
20930 /* Zero extend quotient from AL. */
20931 tmp1 = gen_lowpart (QImode, tmp0);
20932 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
20933 set_unique_reg_note (insn, REG_EQUAL, div);
20935 emit_label (end_label);
20938 #define LEA_MAX_STALL (3)
20939 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
20941 /* Increase given DISTANCE in half-cycles according to
20942 dependencies between PREV and NEXT instructions.
20943 Add 1 half-cycle if there is no dependency and
20944 go to next cycle if there is some dependecy. */
20946 static unsigned int
20947 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
20949 df_ref def, use;
20951 if (!prev || !next)
20952 return distance + (distance & 1) + 2;
20954 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
20955 return distance + 1;
20957 FOR_EACH_INSN_USE (use, next)
20958 FOR_EACH_INSN_DEF (def, prev)
20959 if (!DF_REF_IS_ARTIFICIAL (def)
20960 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
20961 return distance + (distance & 1) + 2;
20963 return distance + 1;
20966 /* Function checks if instruction INSN defines register number
20967 REGNO1 or REGNO2. */
20969 static bool
20970 insn_defines_reg (unsigned int regno1, unsigned int regno2,
20971 rtx_insn *insn)
20973 df_ref def;
20975 FOR_EACH_INSN_DEF (def, insn)
20976 if (DF_REF_REG_DEF_P (def)
20977 && !DF_REF_IS_ARTIFICIAL (def)
20978 && (regno1 == DF_REF_REGNO (def)
20979 || regno2 == DF_REF_REGNO (def)))
20980 return true;
20982 return false;
20985 /* Function checks if instruction INSN uses register number
20986 REGNO as a part of address expression. */
20988 static bool
20989 insn_uses_reg_mem (unsigned int regno, rtx insn)
20991 df_ref use;
20993 FOR_EACH_INSN_USE (use, insn)
20994 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
20995 return true;
20997 return false;
21000 /* Search backward for non-agu definition of register number REGNO1
21001 or register number REGNO2 in basic block starting from instruction
21002 START up to head of basic block or instruction INSN.
21004 Function puts true value into *FOUND var if definition was found
21005 and false otherwise.
21007 Distance in half-cycles between START and found instruction or head
21008 of BB is added to DISTANCE and returned. */
21010 static int
21011 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
21012 rtx_insn *insn, int distance,
21013 rtx_insn *start, bool *found)
21015 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
21016 rtx_insn *prev = start;
21017 rtx_insn *next = NULL;
21019 *found = false;
21021 while (prev
21022 && prev != insn
21023 && distance < LEA_SEARCH_THRESHOLD)
21025 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
21027 distance = increase_distance (prev, next, distance);
21028 if (insn_defines_reg (regno1, regno2, prev))
21030 if (recog_memoized (prev) < 0
21031 || get_attr_type (prev) != TYPE_LEA)
21033 *found = true;
21034 return distance;
21038 next = prev;
21040 if (prev == BB_HEAD (bb))
21041 break;
21043 prev = PREV_INSN (prev);
21046 return distance;
21049 /* Search backward for non-agu definition of register number REGNO1
21050 or register number REGNO2 in INSN's basic block until
21051 1. Pass LEA_SEARCH_THRESHOLD instructions, or
21052 2. Reach neighbor BBs boundary, or
21053 3. Reach agu definition.
21054 Returns the distance between the non-agu definition point and INSN.
21055 If no definition point, returns -1. */
21057 static int
21058 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
21059 rtx_insn *insn)
21061 basic_block bb = BLOCK_FOR_INSN (insn);
21062 int distance = 0;
21063 bool found = false;
21065 if (insn != BB_HEAD (bb))
21066 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
21067 distance, PREV_INSN (insn),
21068 &found);
21070 if (!found && distance < LEA_SEARCH_THRESHOLD)
21072 edge e;
21073 edge_iterator ei;
21074 bool simple_loop = false;
21076 FOR_EACH_EDGE (e, ei, bb->preds)
21077 if (e->src == bb)
21079 simple_loop = true;
21080 break;
21083 if (simple_loop)
21084 distance = distance_non_agu_define_in_bb (regno1, regno2,
21085 insn, distance,
21086 BB_END (bb), &found);
21087 else
21089 int shortest_dist = -1;
21090 bool found_in_bb = false;
21092 FOR_EACH_EDGE (e, ei, bb->preds)
21094 int bb_dist
21095 = distance_non_agu_define_in_bb (regno1, regno2,
21096 insn, distance,
21097 BB_END (e->src),
21098 &found_in_bb);
21099 if (found_in_bb)
21101 if (shortest_dist < 0)
21102 shortest_dist = bb_dist;
21103 else if (bb_dist > 0)
21104 shortest_dist = MIN (bb_dist, shortest_dist);
21106 found = true;
21110 distance = shortest_dist;
21114 /* get_attr_type may modify recog data. We want to make sure
21115 that recog data is valid for instruction INSN, on which
21116 distance_non_agu_define is called. INSN is unchanged here. */
21117 extract_insn_cached (insn);
21119 if (!found)
21120 return -1;
21122 return distance >> 1;
21125 /* Return the distance in half-cycles between INSN and the next
21126 insn that uses register number REGNO in memory address added
21127 to DISTANCE. Return -1 if REGNO0 is set.
21129 Put true value into *FOUND if register usage was found and
21130 false otherwise.
21131 Put true value into *REDEFINED if register redefinition was
21132 found and false otherwise. */
21134 static int
21135 distance_agu_use_in_bb (unsigned int regno,
21136 rtx_insn *insn, int distance, rtx_insn *start,
21137 bool *found, bool *redefined)
21139 basic_block bb = NULL;
21140 rtx_insn *next = start;
21141 rtx_insn *prev = NULL;
21143 *found = false;
21144 *redefined = false;
21146 if (start != NULL_RTX)
21148 bb = BLOCK_FOR_INSN (start);
21149 if (start != BB_HEAD (bb))
21150 /* If insn and start belong to the same bb, set prev to insn,
21151 so the call to increase_distance will increase the distance
21152 between insns by 1. */
21153 prev = insn;
21156 while (next
21157 && next != insn
21158 && distance < LEA_SEARCH_THRESHOLD)
21160 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
21162 distance = increase_distance(prev, next, distance);
21163 if (insn_uses_reg_mem (regno, next))
21165 /* Return DISTANCE if OP0 is used in memory
21166 address in NEXT. */
21167 *found = true;
21168 return distance;
21171 if (insn_defines_reg (regno, INVALID_REGNUM, next))
21173 /* Return -1 if OP0 is set in NEXT. */
21174 *redefined = true;
21175 return -1;
21178 prev = next;
21181 if (next == BB_END (bb))
21182 break;
21184 next = NEXT_INSN (next);
21187 return distance;
21190 /* Return the distance between INSN and the next insn that uses
21191 register number REGNO0 in memory address. Return -1 if no such
21192 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
21194 static int
21195 distance_agu_use (unsigned int regno0, rtx_insn *insn)
21197 basic_block bb = BLOCK_FOR_INSN (insn);
21198 int distance = 0;
21199 bool found = false;
21200 bool redefined = false;
21202 if (insn != BB_END (bb))
21203 distance = distance_agu_use_in_bb (regno0, insn, distance,
21204 NEXT_INSN (insn),
21205 &found, &redefined);
21207 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
21209 edge e;
21210 edge_iterator ei;
21211 bool simple_loop = false;
21213 FOR_EACH_EDGE (e, ei, bb->succs)
21214 if (e->dest == bb)
21216 simple_loop = true;
21217 break;
21220 if (simple_loop)
21221 distance = distance_agu_use_in_bb (regno0, insn,
21222 distance, BB_HEAD (bb),
21223 &found, &redefined);
21224 else
21226 int shortest_dist = -1;
21227 bool found_in_bb = false;
21228 bool redefined_in_bb = false;
21230 FOR_EACH_EDGE (e, ei, bb->succs)
21232 int bb_dist
21233 = distance_agu_use_in_bb (regno0, insn,
21234 distance, BB_HEAD (e->dest),
21235 &found_in_bb, &redefined_in_bb);
21236 if (found_in_bb)
21238 if (shortest_dist < 0)
21239 shortest_dist = bb_dist;
21240 else if (bb_dist > 0)
21241 shortest_dist = MIN (bb_dist, shortest_dist);
21243 found = true;
21247 distance = shortest_dist;
21251 if (!found || redefined)
21252 return -1;
21254 return distance >> 1;
21257 /* Define this macro to tune LEA priority vs ADD, it take effect when
21258 there is a dilemma of choicing LEA or ADD
21259 Negative value: ADD is more preferred than LEA
21260 Zero: Netrual
21261 Positive value: LEA is more preferred than ADD*/
21262 #define IX86_LEA_PRIORITY 0
21264 /* Return true if usage of lea INSN has performance advantage
21265 over a sequence of instructions. Instructions sequence has
21266 SPLIT_COST cycles higher latency than lea latency. */
21268 static bool
21269 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
21270 unsigned int regno2, int split_cost, bool has_scale)
21272 int dist_define, dist_use;
21274 /* For Silvermont if using a 2-source or 3-source LEA for
21275 non-destructive destination purposes, or due to wanting
21276 ability to use SCALE, the use of LEA is justified. */
21277 if (TARGET_SILVERMONT || TARGET_INTEL)
21279 if (has_scale)
21280 return true;
21281 if (split_cost < 1)
21282 return false;
21283 if (regno0 == regno1 || regno0 == regno2)
21284 return false;
21285 return true;
21288 dist_define = distance_non_agu_define (regno1, regno2, insn);
21289 dist_use = distance_agu_use (regno0, insn);
21291 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
21293 /* If there is no non AGU operand definition, no AGU
21294 operand usage and split cost is 0 then both lea
21295 and non lea variants have same priority. Currently
21296 we prefer lea for 64 bit code and non lea on 32 bit
21297 code. */
21298 if (dist_use < 0 && split_cost == 0)
21299 return TARGET_64BIT || IX86_LEA_PRIORITY;
21300 else
21301 return true;
21304 /* With longer definitions distance lea is more preferable.
21305 Here we change it to take into account splitting cost and
21306 lea priority. */
21307 dist_define += split_cost + IX86_LEA_PRIORITY;
21309 /* If there is no use in memory addess then we just check
21310 that split cost exceeds AGU stall. */
21311 if (dist_use < 0)
21312 return dist_define > LEA_MAX_STALL;
21314 /* If this insn has both backward non-agu dependence and forward
21315 agu dependence, the one with short distance takes effect. */
21316 return dist_define >= dist_use;
21319 /* Return true if it is legal to clobber flags by INSN and
21320 false otherwise. */
21322 static bool
21323 ix86_ok_to_clobber_flags (rtx_insn *insn)
21325 basic_block bb = BLOCK_FOR_INSN (insn);
21326 df_ref use;
21327 bitmap live;
21329 while (insn)
21331 if (NONDEBUG_INSN_P (insn))
21333 FOR_EACH_INSN_USE (use, insn)
21334 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
21335 return false;
21337 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
21338 return true;
21341 if (insn == BB_END (bb))
21342 break;
21344 insn = NEXT_INSN (insn);
21347 live = df_get_live_out(bb);
21348 return !REGNO_REG_SET_P (live, FLAGS_REG);
21351 /* Return true if we need to split op0 = op1 + op2 into a sequence of
21352 move and add to avoid AGU stalls. */
21354 bool
21355 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
21357 unsigned int regno0, regno1, regno2;
21359 /* Check if we need to optimize. */
21360 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
21361 return false;
21363 /* Check it is correct to split here. */
21364 if (!ix86_ok_to_clobber_flags(insn))
21365 return false;
21367 regno0 = true_regnum (operands[0]);
21368 regno1 = true_regnum (operands[1]);
21369 regno2 = true_regnum (operands[2]);
21371 /* We need to split only adds with non destructive
21372 destination operand. */
21373 if (regno0 == regno1 || regno0 == regno2)
21374 return false;
21375 else
21376 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
21379 /* Return true if we should emit lea instruction instead of mov
21380 instruction. */
21382 bool
21383 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
21385 unsigned int regno0, regno1;
21387 /* Check if we need to optimize. */
21388 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
21389 return false;
21391 /* Use lea for reg to reg moves only. */
21392 if (!REG_P (operands[0]) || !REG_P (operands[1]))
21393 return false;
21395 regno0 = true_regnum (operands[0]);
21396 regno1 = true_regnum (operands[1]);
21398 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
21401 /* Return true if we need to split lea into a sequence of
21402 instructions to avoid AGU stalls. */
21404 bool
21405 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
21407 unsigned int regno0, regno1, regno2;
21408 int split_cost;
21409 struct ix86_address parts;
21410 int ok;
21412 /* Check we need to optimize. */
21413 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
21414 return false;
21416 /* The "at least two components" test below might not catch simple
21417 move or zero extension insns if parts.base is non-NULL and parts.disp
21418 is const0_rtx as the only components in the address, e.g. if the
21419 register is %rbp or %r13. As this test is much cheaper and moves or
21420 zero extensions are the common case, do this check first. */
21421 if (REG_P (operands[1])
21422 || (SImode_address_operand (operands[1], VOIDmode)
21423 && REG_P (XEXP (operands[1], 0))))
21424 return false;
21426 /* Check if it is OK to split here. */
21427 if (!ix86_ok_to_clobber_flags (insn))
21428 return false;
21430 ok = ix86_decompose_address (operands[1], &parts);
21431 gcc_assert (ok);
21433 /* There should be at least two components in the address. */
21434 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
21435 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
21436 return false;
21438 /* We should not split into add if non legitimate pic
21439 operand is used as displacement. */
21440 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
21441 return false;
21443 regno0 = true_regnum (operands[0]) ;
21444 regno1 = INVALID_REGNUM;
21445 regno2 = INVALID_REGNUM;
21447 if (parts.base)
21448 regno1 = true_regnum (parts.base);
21449 if (parts.index)
21450 regno2 = true_regnum (parts.index);
21452 split_cost = 0;
21454 /* Compute how many cycles we will add to execution time
21455 if split lea into a sequence of instructions. */
21456 if (parts.base || parts.index)
21458 /* Have to use mov instruction if non desctructive
21459 destination form is used. */
21460 if (regno1 != regno0 && regno2 != regno0)
21461 split_cost += 1;
21463 /* Have to add index to base if both exist. */
21464 if (parts.base && parts.index)
21465 split_cost += 1;
21467 /* Have to use shift and adds if scale is 2 or greater. */
21468 if (parts.scale > 1)
21470 if (regno0 != regno1)
21471 split_cost += 1;
21472 else if (regno2 == regno0)
21473 split_cost += 4;
21474 else
21475 split_cost += parts.scale;
21478 /* Have to use add instruction with immediate if
21479 disp is non zero. */
21480 if (parts.disp && parts.disp != const0_rtx)
21481 split_cost += 1;
21483 /* Subtract the price of lea. */
21484 split_cost -= 1;
21487 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
21488 parts.scale > 1);
21491 /* Emit x86 binary operand CODE in mode MODE, where the first operand
21492 matches destination. RTX includes clobber of FLAGS_REG. */
21494 static void
21495 ix86_emit_binop (enum rtx_code code, machine_mode mode,
21496 rtx dst, rtx src)
21498 rtx op, clob;
21500 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
21501 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
21503 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
21506 /* Return true if regno1 def is nearest to the insn. */
21508 static bool
21509 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
21511 rtx_insn *prev = insn;
21512 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
21514 if (insn == start)
21515 return false;
21516 while (prev && prev != start)
21518 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
21520 prev = PREV_INSN (prev);
21521 continue;
21523 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
21524 return true;
21525 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
21526 return false;
21527 prev = PREV_INSN (prev);
21530 /* None of the regs is defined in the bb. */
21531 return false;
21534 /* Split lea instructions into a sequence of instructions
21535 which are executed on ALU to avoid AGU stalls.
21536 It is assumed that it is allowed to clobber flags register
21537 at lea position. */
21539 void
21540 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
21542 unsigned int regno0, regno1, regno2;
21543 struct ix86_address parts;
21544 rtx target, tmp;
21545 int ok, adds;
21547 ok = ix86_decompose_address (operands[1], &parts);
21548 gcc_assert (ok);
21550 target = gen_lowpart (mode, operands[0]);
21552 regno0 = true_regnum (target);
21553 regno1 = INVALID_REGNUM;
21554 regno2 = INVALID_REGNUM;
21556 if (parts.base)
21558 parts.base = gen_lowpart (mode, parts.base);
21559 regno1 = true_regnum (parts.base);
21562 if (parts.index)
21564 parts.index = gen_lowpart (mode, parts.index);
21565 regno2 = true_regnum (parts.index);
21568 if (parts.disp)
21569 parts.disp = gen_lowpart (mode, parts.disp);
21571 if (parts.scale > 1)
21573 /* Case r1 = r1 + ... */
21574 if (regno1 == regno0)
21576 /* If we have a case r1 = r1 + C * r2 then we
21577 should use multiplication which is very
21578 expensive. Assume cost model is wrong if we
21579 have such case here. */
21580 gcc_assert (regno2 != regno0);
21582 for (adds = parts.scale; adds > 0; adds--)
21583 ix86_emit_binop (PLUS, mode, target, parts.index);
21585 else
21587 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
21588 if (regno0 != regno2)
21589 emit_insn (gen_rtx_SET (target, parts.index));
21591 /* Use shift for scaling. */
21592 ix86_emit_binop (ASHIFT, mode, target,
21593 GEN_INT (exact_log2 (parts.scale)));
21595 if (parts.base)
21596 ix86_emit_binop (PLUS, mode, target, parts.base);
21598 if (parts.disp && parts.disp != const0_rtx)
21599 ix86_emit_binop (PLUS, mode, target, parts.disp);
21602 else if (!parts.base && !parts.index)
21604 gcc_assert(parts.disp);
21605 emit_insn (gen_rtx_SET (target, parts.disp));
21607 else
21609 if (!parts.base)
21611 if (regno0 != regno2)
21612 emit_insn (gen_rtx_SET (target, parts.index));
21614 else if (!parts.index)
21616 if (regno0 != regno1)
21617 emit_insn (gen_rtx_SET (target, parts.base));
21619 else
21621 if (regno0 == regno1)
21622 tmp = parts.index;
21623 else if (regno0 == regno2)
21624 tmp = parts.base;
21625 else
21627 rtx tmp1;
21629 /* Find better operand for SET instruction, depending
21630 on which definition is farther from the insn. */
21631 if (find_nearest_reg_def (insn, regno1, regno2))
21632 tmp = parts.index, tmp1 = parts.base;
21633 else
21634 tmp = parts.base, tmp1 = parts.index;
21636 emit_insn (gen_rtx_SET (target, tmp));
21638 if (parts.disp && parts.disp != const0_rtx)
21639 ix86_emit_binop (PLUS, mode, target, parts.disp);
21641 ix86_emit_binop (PLUS, mode, target, tmp1);
21642 return;
21645 ix86_emit_binop (PLUS, mode, target, tmp);
21648 if (parts.disp && parts.disp != const0_rtx)
21649 ix86_emit_binop (PLUS, mode, target, parts.disp);
21653 /* Return true if it is ok to optimize an ADD operation to LEA
21654 operation to avoid flag register consumation. For most processors,
21655 ADD is faster than LEA. For the processors like BONNELL, if the
21656 destination register of LEA holds an actual address which will be
21657 used soon, LEA is better and otherwise ADD is better. */
21659 bool
21660 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
21662 unsigned int regno0 = true_regnum (operands[0]);
21663 unsigned int regno1 = true_regnum (operands[1]);
21664 unsigned int regno2 = true_regnum (operands[2]);
21666 /* If a = b + c, (a!=b && a!=c), must use lea form. */
21667 if (regno0 != regno1 && regno0 != regno2)
21668 return true;
21670 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
21671 return false;
21673 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
21676 /* Return true if destination reg of SET_BODY is shift count of
21677 USE_BODY. */
21679 static bool
21680 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
21682 rtx set_dest;
21683 rtx shift_rtx;
21684 int i;
21686 /* Retrieve destination of SET_BODY. */
21687 switch (GET_CODE (set_body))
21689 case SET:
21690 set_dest = SET_DEST (set_body);
21691 if (!set_dest || !REG_P (set_dest))
21692 return false;
21693 break;
21694 case PARALLEL:
21695 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
21696 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
21697 use_body))
21698 return true;
21699 /* FALLTHROUGH */
21700 default:
21701 return false;
21704 /* Retrieve shift count of USE_BODY. */
21705 switch (GET_CODE (use_body))
21707 case SET:
21708 shift_rtx = XEXP (use_body, 1);
21709 break;
21710 case PARALLEL:
21711 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
21712 if (ix86_dep_by_shift_count_body (set_body,
21713 XVECEXP (use_body, 0, i)))
21714 return true;
21715 /* FALLTHROUGH */
21716 default:
21717 return false;
21720 if (shift_rtx
21721 && (GET_CODE (shift_rtx) == ASHIFT
21722 || GET_CODE (shift_rtx) == LSHIFTRT
21723 || GET_CODE (shift_rtx) == ASHIFTRT
21724 || GET_CODE (shift_rtx) == ROTATE
21725 || GET_CODE (shift_rtx) == ROTATERT))
21727 rtx shift_count = XEXP (shift_rtx, 1);
21729 /* Return true if shift count is dest of SET_BODY. */
21730 if (REG_P (shift_count))
21732 /* Add check since it can be invoked before register
21733 allocation in pre-reload schedule. */
21734 if (reload_completed
21735 && true_regnum (set_dest) == true_regnum (shift_count))
21736 return true;
21737 else if (REGNO(set_dest) == REGNO(shift_count))
21738 return true;
21742 return false;
21745 /* Return true if destination reg of SET_INSN is shift count of
21746 USE_INSN. */
21748 bool
21749 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
21751 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
21752 PATTERN (use_insn));
21755 /* Return TRUE or FALSE depending on whether the unary operator meets the
21756 appropriate constraints. */
21758 bool
21759 ix86_unary_operator_ok (enum rtx_code,
21760 machine_mode,
21761 rtx operands[2])
21763 /* If one of operands is memory, source and destination must match. */
21764 if ((MEM_P (operands[0])
21765 || MEM_P (operands[1]))
21766 && ! rtx_equal_p (operands[0], operands[1]))
21767 return false;
21768 return true;
21771 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
21772 are ok, keeping in mind the possible movddup alternative. */
21774 bool
21775 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
21777 if (MEM_P (operands[0]))
21778 return rtx_equal_p (operands[0], operands[1 + high]);
21779 if (MEM_P (operands[1]) && MEM_P (operands[2]))
21780 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
21781 return true;
21784 /* Post-reload splitter for converting an SF or DFmode value in an
21785 SSE register into an unsigned SImode. */
21787 void
21788 ix86_split_convert_uns_si_sse (rtx operands[])
21790 machine_mode vecmode;
21791 rtx value, large, zero_or_two31, input, two31, x;
21793 large = operands[1];
21794 zero_or_two31 = operands[2];
21795 input = operands[3];
21796 two31 = operands[4];
21797 vecmode = GET_MODE (large);
21798 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
21800 /* Load up the value into the low element. We must ensure that the other
21801 elements are valid floats -- zero is the easiest such value. */
21802 if (MEM_P (input))
21804 if (vecmode == V4SFmode)
21805 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
21806 else
21807 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
21809 else
21811 input = gen_rtx_REG (vecmode, REGNO (input));
21812 emit_move_insn (value, CONST0_RTX (vecmode));
21813 if (vecmode == V4SFmode)
21814 emit_insn (gen_sse_movss (value, value, input));
21815 else
21816 emit_insn (gen_sse2_movsd (value, value, input));
21819 emit_move_insn (large, two31);
21820 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
21822 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
21823 emit_insn (gen_rtx_SET (large, x));
21825 x = gen_rtx_AND (vecmode, zero_or_two31, large);
21826 emit_insn (gen_rtx_SET (zero_or_two31, x));
21828 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
21829 emit_insn (gen_rtx_SET (value, x));
21831 large = gen_rtx_REG (V4SImode, REGNO (large));
21832 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
21834 x = gen_rtx_REG (V4SImode, REGNO (value));
21835 if (vecmode == V4SFmode)
21836 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
21837 else
21838 emit_insn (gen_sse2_cvttpd2dq (x, value));
21839 value = x;
21841 emit_insn (gen_xorv4si3 (value, value, large));
21844 /* Convert an unsigned DImode value into a DFmode, using only SSE.
21845 Expects the 64-bit DImode to be supplied in a pair of integral
21846 registers. Requires SSE2; will use SSE3 if available. For x86_32,
21847 -mfpmath=sse, !optimize_size only. */
21849 void
21850 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
21852 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
21853 rtx int_xmm, fp_xmm;
21854 rtx biases, exponents;
21855 rtx x;
21857 int_xmm = gen_reg_rtx (V4SImode);
21858 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
21859 emit_insn (gen_movdi_to_sse (int_xmm, input));
21860 else if (TARGET_SSE_SPLIT_REGS)
21862 emit_clobber (int_xmm);
21863 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
21865 else
21867 x = gen_reg_rtx (V2DImode);
21868 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
21869 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
21872 x = gen_rtx_CONST_VECTOR (V4SImode,
21873 gen_rtvec (4, GEN_INT (0x43300000UL),
21874 GEN_INT (0x45300000UL),
21875 const0_rtx, const0_rtx));
21876 exponents = validize_mem (force_const_mem (V4SImode, x));
21878 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
21879 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
21881 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
21882 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
21883 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
21884 (0x1.0p84 + double(fp_value_hi_xmm)).
21885 Note these exponents differ by 32. */
21887 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
21889 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
21890 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
21891 real_ldexp (&bias_lo_rvt, &dconst1, 52);
21892 real_ldexp (&bias_hi_rvt, &dconst1, 84);
21893 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
21894 x = const_double_from_real_value (bias_hi_rvt, DFmode);
21895 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
21896 biases = validize_mem (force_const_mem (V2DFmode, biases));
21897 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
21899 /* Add the upper and lower DFmode values together. */
21900 if (TARGET_SSE3)
21901 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
21902 else
21904 x = copy_to_mode_reg (V2DFmode, fp_xmm);
21905 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
21906 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
21909 ix86_expand_vector_extract (false, target, fp_xmm, 0);
21912 /* Not used, but eases macroization of patterns. */
21913 void
21914 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
21916 gcc_unreachable ();
21919 /* Convert an unsigned SImode value into a DFmode. Only currently used
21920 for SSE, but applicable anywhere. */
21922 void
21923 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
21925 REAL_VALUE_TYPE TWO31r;
21926 rtx x, fp;
21928 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
21929 NULL, 1, OPTAB_DIRECT);
21931 fp = gen_reg_rtx (DFmode);
21932 emit_insn (gen_floatsidf2 (fp, x));
21934 real_ldexp (&TWO31r, &dconst1, 31);
21935 x = const_double_from_real_value (TWO31r, DFmode);
21937 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
21938 if (x != target)
21939 emit_move_insn (target, x);
21942 /* Convert a signed DImode value into a DFmode. Only used for SSE in
21943 32-bit mode; otherwise we have a direct convert instruction. */
21945 void
21946 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
21948 REAL_VALUE_TYPE TWO32r;
21949 rtx fp_lo, fp_hi, x;
21951 fp_lo = gen_reg_rtx (DFmode);
21952 fp_hi = gen_reg_rtx (DFmode);
21954 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
21956 real_ldexp (&TWO32r, &dconst1, 32);
21957 x = const_double_from_real_value (TWO32r, DFmode);
21958 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
21960 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
21962 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
21963 0, OPTAB_DIRECT);
21964 if (x != target)
21965 emit_move_insn (target, x);
21968 /* Convert an unsigned SImode value into a SFmode, using only SSE.
21969 For x86_32, -mfpmath=sse, !optimize_size only. */
21970 void
21971 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
21973 REAL_VALUE_TYPE ONE16r;
21974 rtx fp_hi, fp_lo, int_hi, int_lo, x;
21976 real_ldexp (&ONE16r, &dconst1, 16);
21977 x = const_double_from_real_value (ONE16r, SFmode);
21978 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
21979 NULL, 0, OPTAB_DIRECT);
21980 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
21981 NULL, 0, OPTAB_DIRECT);
21982 fp_hi = gen_reg_rtx (SFmode);
21983 fp_lo = gen_reg_rtx (SFmode);
21984 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
21985 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
21986 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
21987 0, OPTAB_DIRECT);
21988 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
21989 0, OPTAB_DIRECT);
21990 if (!rtx_equal_p (target, fp_hi))
21991 emit_move_insn (target, fp_hi);
21994 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
21995 a vector of unsigned ints VAL to vector of floats TARGET. */
21997 void
21998 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
22000 rtx tmp[8];
22001 REAL_VALUE_TYPE TWO16r;
22002 machine_mode intmode = GET_MODE (val);
22003 machine_mode fltmode = GET_MODE (target);
22004 rtx (*cvt) (rtx, rtx);
22006 if (intmode == V4SImode)
22007 cvt = gen_floatv4siv4sf2;
22008 else
22009 cvt = gen_floatv8siv8sf2;
22010 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
22011 tmp[0] = force_reg (intmode, tmp[0]);
22012 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
22013 OPTAB_DIRECT);
22014 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
22015 NULL_RTX, 1, OPTAB_DIRECT);
22016 tmp[3] = gen_reg_rtx (fltmode);
22017 emit_insn (cvt (tmp[3], tmp[1]));
22018 tmp[4] = gen_reg_rtx (fltmode);
22019 emit_insn (cvt (tmp[4], tmp[2]));
22020 real_ldexp (&TWO16r, &dconst1, 16);
22021 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
22022 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
22023 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
22024 OPTAB_DIRECT);
22025 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
22026 OPTAB_DIRECT);
22027 if (tmp[7] != target)
22028 emit_move_insn (target, tmp[7]);
22031 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
22032 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
22033 This is done by doing just signed conversion if < 0x1p31, and otherwise by
22034 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
22037 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
22039 REAL_VALUE_TYPE TWO31r;
22040 rtx two31r, tmp[4];
22041 machine_mode mode = GET_MODE (val);
22042 machine_mode scalarmode = GET_MODE_INNER (mode);
22043 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
22044 rtx (*cmp) (rtx, rtx, rtx, rtx);
22045 int i;
22047 for (i = 0; i < 3; i++)
22048 tmp[i] = gen_reg_rtx (mode);
22049 real_ldexp (&TWO31r, &dconst1, 31);
22050 two31r = const_double_from_real_value (TWO31r, scalarmode);
22051 two31r = ix86_build_const_vector (mode, 1, two31r);
22052 two31r = force_reg (mode, two31r);
22053 switch (mode)
22055 case E_V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
22056 case E_V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
22057 case E_V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
22058 case E_V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
22059 default: gcc_unreachable ();
22061 tmp[3] = gen_rtx_LE (mode, two31r, val);
22062 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
22063 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
22064 0, OPTAB_DIRECT);
22065 if (intmode == V4SImode || TARGET_AVX2)
22066 *xorp = expand_simple_binop (intmode, ASHIFT,
22067 gen_lowpart (intmode, tmp[0]),
22068 GEN_INT (31), NULL_RTX, 0,
22069 OPTAB_DIRECT);
22070 else
22072 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
22073 two31 = ix86_build_const_vector (intmode, 1, two31);
22074 *xorp = expand_simple_binop (intmode, AND,
22075 gen_lowpart (intmode, tmp[0]),
22076 two31, NULL_RTX, 0,
22077 OPTAB_DIRECT);
22079 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
22080 0, OPTAB_DIRECT);
22083 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
22084 then replicate the value for all elements of the vector
22085 register. */
22088 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
22090 int i, n_elt;
22091 rtvec v;
22092 machine_mode scalar_mode;
22094 switch (mode)
22096 case E_V64QImode:
22097 case E_V32QImode:
22098 case E_V16QImode:
22099 case E_V32HImode:
22100 case E_V16HImode:
22101 case E_V8HImode:
22102 case E_V16SImode:
22103 case E_V8SImode:
22104 case E_V4SImode:
22105 case E_V8DImode:
22106 case E_V4DImode:
22107 case E_V2DImode:
22108 gcc_assert (vect);
22109 /* FALLTHRU */
22110 case E_V16SFmode:
22111 case E_V8SFmode:
22112 case E_V4SFmode:
22113 case E_V8DFmode:
22114 case E_V4DFmode:
22115 case E_V2DFmode:
22116 n_elt = GET_MODE_NUNITS (mode);
22117 v = rtvec_alloc (n_elt);
22118 scalar_mode = GET_MODE_INNER (mode);
22120 RTVEC_ELT (v, 0) = value;
22122 for (i = 1; i < n_elt; ++i)
22123 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
22125 return gen_rtx_CONST_VECTOR (mode, v);
22127 default:
22128 gcc_unreachable ();
22132 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
22133 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
22134 for an SSE register. If VECT is true, then replicate the mask for
22135 all elements of the vector register. If INVERT is true, then create
22136 a mask excluding the sign bit. */
22139 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
22141 machine_mode vec_mode, imode;
22142 wide_int w;
22143 rtx mask, v;
22145 switch (mode)
22147 case E_V16SImode:
22148 case E_V16SFmode:
22149 case E_V8SImode:
22150 case E_V4SImode:
22151 case E_V8SFmode:
22152 case E_V4SFmode:
22153 vec_mode = mode;
22154 imode = SImode;
22155 break;
22157 case E_V8DImode:
22158 case E_V4DImode:
22159 case E_V2DImode:
22160 case E_V8DFmode:
22161 case E_V4DFmode:
22162 case E_V2DFmode:
22163 vec_mode = mode;
22164 imode = DImode;
22165 break;
22167 case E_TImode:
22168 case E_TFmode:
22169 vec_mode = VOIDmode;
22170 imode = TImode;
22171 break;
22173 default:
22174 gcc_unreachable ();
22177 machine_mode inner_mode = GET_MODE_INNER (mode);
22178 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
22179 GET_MODE_BITSIZE (inner_mode));
22180 if (invert)
22181 w = wi::bit_not (w);
22183 /* Force this value into the low part of a fp vector constant. */
22184 mask = immed_wide_int_const (w, imode);
22185 mask = gen_lowpart (inner_mode, mask);
22187 if (vec_mode == VOIDmode)
22188 return force_reg (inner_mode, mask);
22190 v = ix86_build_const_vector (vec_mode, vect, mask);
22191 return force_reg (vec_mode, v);
22194 /* Generate code for floating point ABS or NEG. */
22196 void
22197 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
22198 rtx operands[])
22200 rtx mask, set, dst, src;
22201 bool use_sse = false;
22202 bool vector_mode = VECTOR_MODE_P (mode);
22203 machine_mode vmode = mode;
22205 if (vector_mode)
22206 use_sse = true;
22207 else if (mode == TFmode)
22208 use_sse = true;
22209 else if (TARGET_SSE_MATH)
22211 use_sse = SSE_FLOAT_MODE_P (mode);
22212 if (mode == SFmode)
22213 vmode = V4SFmode;
22214 else if (mode == DFmode)
22215 vmode = V2DFmode;
22218 /* NEG and ABS performed with SSE use bitwise mask operations.
22219 Create the appropriate mask now. */
22220 if (use_sse)
22221 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
22222 else
22223 mask = NULL_RTX;
22225 dst = operands[0];
22226 src = operands[1];
22228 set = gen_rtx_fmt_e (code, mode, src);
22229 set = gen_rtx_SET (dst, set);
22231 if (mask)
22233 rtx use, clob;
22234 rtvec par;
22236 use = gen_rtx_USE (VOIDmode, mask);
22237 if (vector_mode)
22238 par = gen_rtvec (2, set, use);
22239 else
22241 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
22242 par = gen_rtvec (3, set, use, clob);
22244 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
22246 else
22247 emit_insn (set);
22250 /* Expand a copysign operation. Special case operand 0 being a constant. */
22252 void
22253 ix86_expand_copysign (rtx operands[])
22255 machine_mode mode, vmode;
22256 rtx dest, op0, op1, mask, nmask;
22258 dest = operands[0];
22259 op0 = operands[1];
22260 op1 = operands[2];
22262 mode = GET_MODE (dest);
22264 if (mode == SFmode)
22265 vmode = V4SFmode;
22266 else if (mode == DFmode)
22267 vmode = V2DFmode;
22268 else
22269 vmode = mode;
22271 if (CONST_DOUBLE_P (op0))
22273 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
22275 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
22276 op0 = simplify_unary_operation (ABS, mode, op0, mode);
22278 if (mode == SFmode || mode == DFmode)
22280 if (op0 == CONST0_RTX (mode))
22281 op0 = CONST0_RTX (vmode);
22282 else
22284 rtx v = ix86_build_const_vector (vmode, false, op0);
22286 op0 = force_reg (vmode, v);
22289 else if (op0 != CONST0_RTX (mode))
22290 op0 = force_reg (mode, op0);
22292 mask = ix86_build_signbit_mask (vmode, 0, 0);
22294 if (mode == SFmode)
22295 copysign_insn = gen_copysignsf3_const;
22296 else if (mode == DFmode)
22297 copysign_insn = gen_copysigndf3_const;
22298 else
22299 copysign_insn = gen_copysigntf3_const;
22301 emit_insn (copysign_insn (dest, op0, op1, mask));
22303 else
22305 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
22307 nmask = ix86_build_signbit_mask (vmode, 0, 1);
22308 mask = ix86_build_signbit_mask (vmode, 0, 0);
22310 if (mode == SFmode)
22311 copysign_insn = gen_copysignsf3_var;
22312 else if (mode == DFmode)
22313 copysign_insn = gen_copysigndf3_var;
22314 else
22315 copysign_insn = gen_copysigntf3_var;
22317 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
22321 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
22322 be a constant, and so has already been expanded into a vector constant. */
22324 void
22325 ix86_split_copysign_const (rtx operands[])
22327 machine_mode mode, vmode;
22328 rtx dest, op0, mask, x;
22330 dest = operands[0];
22331 op0 = operands[1];
22332 mask = operands[3];
22334 mode = GET_MODE (dest);
22335 vmode = GET_MODE (mask);
22337 dest = lowpart_subreg (vmode, dest, mode);
22338 x = gen_rtx_AND (vmode, dest, mask);
22339 emit_insn (gen_rtx_SET (dest, x));
22341 if (op0 != CONST0_RTX (vmode))
22343 x = gen_rtx_IOR (vmode, dest, op0);
22344 emit_insn (gen_rtx_SET (dest, x));
22348 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
22349 so we have to do two masks. */
22351 void
22352 ix86_split_copysign_var (rtx operands[])
22354 machine_mode mode, vmode;
22355 rtx dest, scratch, op0, op1, mask, nmask, x;
22357 dest = operands[0];
22358 scratch = operands[1];
22359 op0 = operands[2];
22360 op1 = operands[3];
22361 nmask = operands[4];
22362 mask = operands[5];
22364 mode = GET_MODE (dest);
22365 vmode = GET_MODE (mask);
22367 if (rtx_equal_p (op0, op1))
22369 /* Shouldn't happen often (it's useless, obviously), but when it does
22370 we'd generate incorrect code if we continue below. */
22371 emit_move_insn (dest, op0);
22372 return;
22375 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
22377 gcc_assert (REGNO (op1) == REGNO (scratch));
22379 x = gen_rtx_AND (vmode, scratch, mask);
22380 emit_insn (gen_rtx_SET (scratch, x));
22382 dest = mask;
22383 op0 = lowpart_subreg (vmode, op0, mode);
22384 x = gen_rtx_NOT (vmode, dest);
22385 x = gen_rtx_AND (vmode, x, op0);
22386 emit_insn (gen_rtx_SET (dest, x));
22388 else
22390 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
22392 x = gen_rtx_AND (vmode, scratch, mask);
22394 else /* alternative 2,4 */
22396 gcc_assert (REGNO (mask) == REGNO (scratch));
22397 op1 = lowpart_subreg (vmode, op1, mode);
22398 x = gen_rtx_AND (vmode, scratch, op1);
22400 emit_insn (gen_rtx_SET (scratch, x));
22402 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
22404 dest = lowpart_subreg (vmode, op0, mode);
22405 x = gen_rtx_AND (vmode, dest, nmask);
22407 else /* alternative 3,4 */
22409 gcc_assert (REGNO (nmask) == REGNO (dest));
22410 dest = nmask;
22411 op0 = lowpart_subreg (vmode, op0, mode);
22412 x = gen_rtx_AND (vmode, dest, op0);
22414 emit_insn (gen_rtx_SET (dest, x));
22417 x = gen_rtx_IOR (vmode, dest, scratch);
22418 emit_insn (gen_rtx_SET (dest, x));
22421 /* Return TRUE or FALSE depending on whether the first SET in INSN
22422 has source and destination with matching CC modes, and that the
22423 CC mode is at least as constrained as REQ_MODE. */
22425 bool
22426 ix86_match_ccmode (rtx insn, machine_mode req_mode)
22428 rtx set;
22429 machine_mode set_mode;
22431 set = PATTERN (insn);
22432 if (GET_CODE (set) == PARALLEL)
22433 set = XVECEXP (set, 0, 0);
22434 gcc_assert (GET_CODE (set) == SET);
22435 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
22437 set_mode = GET_MODE (SET_DEST (set));
22438 switch (set_mode)
22440 case E_CCNOmode:
22441 if (req_mode != CCNOmode
22442 && (req_mode != CCmode
22443 || XEXP (SET_SRC (set), 1) != const0_rtx))
22444 return false;
22445 break;
22446 case E_CCmode:
22447 if (req_mode == CCGCmode)
22448 return false;
22449 /* FALLTHRU */
22450 case E_CCGCmode:
22451 if (req_mode == CCGOCmode || req_mode == CCNOmode)
22452 return false;
22453 /* FALLTHRU */
22454 case E_CCGOCmode:
22455 if (req_mode == CCZmode)
22456 return false;
22457 /* FALLTHRU */
22458 case E_CCZmode:
22459 break;
22461 case E_CCGZmode:
22463 case E_CCAmode:
22464 case E_CCCmode:
22465 case E_CCOmode:
22466 case E_CCPmode:
22467 case E_CCSmode:
22468 if (set_mode != req_mode)
22469 return false;
22470 break;
22472 default:
22473 gcc_unreachable ();
22476 return GET_MODE (SET_SRC (set)) == set_mode;
22479 /* Generate insn patterns to do an integer compare of OPERANDS. */
22481 static rtx
22482 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
22484 machine_mode cmpmode;
22485 rtx tmp, flags;
22487 cmpmode = SELECT_CC_MODE (code, op0, op1);
22488 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
22490 /* This is very simple, but making the interface the same as in the
22491 FP case makes the rest of the code easier. */
22492 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
22493 emit_insn (gen_rtx_SET (flags, tmp));
22495 /* Return the test that should be put into the flags user, i.e.
22496 the bcc, scc, or cmov instruction. */
22497 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
22500 /* Figure out whether to use unordered fp comparisons. */
22502 static bool
22503 ix86_unordered_fp_compare (enum rtx_code code)
22505 if (!TARGET_IEEE_FP)
22506 return false;
22508 switch (code)
22510 case GT:
22511 case GE:
22512 case LT:
22513 case LE:
22514 return false;
22516 case EQ:
22517 case NE:
22519 case LTGT:
22520 case UNORDERED:
22521 case ORDERED:
22522 case UNLT:
22523 case UNLE:
22524 case UNGT:
22525 case UNGE:
22526 case UNEQ:
22527 return true;
22529 default:
22530 gcc_unreachable ();
22534 machine_mode
22535 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
22537 machine_mode mode = GET_MODE (op0);
22539 if (SCALAR_FLOAT_MODE_P (mode))
22541 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
22542 return CCFPmode;
22545 switch (code)
22547 /* Only zero flag is needed. */
22548 case EQ: /* ZF=0 */
22549 case NE: /* ZF!=0 */
22550 return CCZmode;
22551 /* Codes needing carry flag. */
22552 case GEU: /* CF=0 */
22553 case LTU: /* CF=1 */
22554 /* Detect overflow checks. They need just the carry flag. */
22555 if (GET_CODE (op0) == PLUS
22556 && (rtx_equal_p (op1, XEXP (op0, 0))
22557 || rtx_equal_p (op1, XEXP (op0, 1))))
22558 return CCCmode;
22559 else
22560 return CCmode;
22561 case GTU: /* CF=0 & ZF=0 */
22562 case LEU: /* CF=1 | ZF=1 */
22563 return CCmode;
22564 /* Codes possibly doable only with sign flag when
22565 comparing against zero. */
22566 case GE: /* SF=OF or SF=0 */
22567 case LT: /* SF<>OF or SF=1 */
22568 if (op1 == const0_rtx)
22569 return CCGOCmode;
22570 else
22571 /* For other cases Carry flag is not required. */
22572 return CCGCmode;
22573 /* Codes doable only with sign flag when comparing
22574 against zero, but we miss jump instruction for it
22575 so we need to use relational tests against overflow
22576 that thus needs to be zero. */
22577 case GT: /* ZF=0 & SF=OF */
22578 case LE: /* ZF=1 | SF<>OF */
22579 if (op1 == const0_rtx)
22580 return CCNOmode;
22581 else
22582 return CCGCmode;
22583 /* strcmp pattern do (use flags) and combine may ask us for proper
22584 mode. */
22585 case USE:
22586 return CCmode;
22587 default:
22588 gcc_unreachable ();
22592 /* Return the fixed registers used for condition codes. */
22594 static bool
22595 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22597 *p1 = FLAGS_REG;
22598 *p2 = FPSR_REG;
22599 return true;
22602 /* If two condition code modes are compatible, return a condition code
22603 mode which is compatible with both. Otherwise, return
22604 VOIDmode. */
22606 static machine_mode
22607 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
22609 if (m1 == m2)
22610 return m1;
22612 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
22613 return VOIDmode;
22615 if ((m1 == CCGCmode && m2 == CCGOCmode)
22616 || (m1 == CCGOCmode && m2 == CCGCmode))
22617 return CCGCmode;
22619 if ((m1 == CCNOmode && m2 == CCGOCmode)
22620 || (m1 == CCGOCmode && m2 == CCNOmode))
22621 return CCNOmode;
22623 if (m1 == CCZmode
22624 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
22625 return m2;
22626 else if (m2 == CCZmode
22627 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
22628 return m1;
22630 switch (m1)
22632 default:
22633 gcc_unreachable ();
22635 case E_CCmode:
22636 case E_CCGCmode:
22637 case E_CCGOCmode:
22638 case E_CCNOmode:
22639 case E_CCAmode:
22640 case E_CCCmode:
22641 case E_CCOmode:
22642 case E_CCPmode:
22643 case E_CCSmode:
22644 case E_CCZmode:
22645 switch (m2)
22647 default:
22648 return VOIDmode;
22650 case E_CCmode:
22651 case E_CCGCmode:
22652 case E_CCGOCmode:
22653 case E_CCNOmode:
22654 case E_CCAmode:
22655 case E_CCCmode:
22656 case E_CCOmode:
22657 case E_CCPmode:
22658 case E_CCSmode:
22659 case E_CCZmode:
22660 return CCmode;
22663 case E_CCFPmode:
22664 /* These are only compatible with themselves, which we already
22665 checked above. */
22666 return VOIDmode;
22671 /* Return a comparison we can do and that it is equivalent to
22672 swap_condition (code) apart possibly from orderedness.
22673 But, never change orderedness if TARGET_IEEE_FP, returning
22674 UNKNOWN in that case if necessary. */
22676 static enum rtx_code
22677 ix86_fp_swap_condition (enum rtx_code code)
22679 switch (code)
22681 case GT: /* GTU - CF=0 & ZF=0 */
22682 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
22683 case GE: /* GEU - CF=0 */
22684 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
22685 case UNLT: /* LTU - CF=1 */
22686 return TARGET_IEEE_FP ? UNKNOWN : GT;
22687 case UNLE: /* LEU - CF=1 | ZF=1 */
22688 return TARGET_IEEE_FP ? UNKNOWN : GE;
22689 default:
22690 return swap_condition (code);
22694 /* Return cost of comparison CODE using the best strategy for performance.
22695 All following functions do use number of instructions as a cost metrics.
22696 In future this should be tweaked to compute bytes for optimize_size and
22697 take into account performance of various instructions on various CPUs. */
22699 static int
22700 ix86_fp_comparison_cost (enum rtx_code code)
22702 int arith_cost;
22704 /* The cost of code using bit-twiddling on %ah. */
22705 switch (code)
22707 case UNLE:
22708 case UNLT:
22709 case LTGT:
22710 case GT:
22711 case GE:
22712 case UNORDERED:
22713 case ORDERED:
22714 case UNEQ:
22715 arith_cost = 4;
22716 break;
22717 case LT:
22718 case NE:
22719 case EQ:
22720 case UNGE:
22721 arith_cost = TARGET_IEEE_FP ? 5 : 4;
22722 break;
22723 case LE:
22724 case UNGT:
22725 arith_cost = TARGET_IEEE_FP ? 6 : 4;
22726 break;
22727 default:
22728 gcc_unreachable ();
22731 switch (ix86_fp_comparison_strategy (code))
22733 case IX86_FPCMP_COMI:
22734 return arith_cost > 4 ? 3 : 2;
22735 case IX86_FPCMP_SAHF:
22736 return arith_cost > 4 ? 4 : 3;
22737 default:
22738 return arith_cost;
22742 /* Return strategy to use for floating-point. We assume that fcomi is always
22743 preferrable where available, since that is also true when looking at size
22744 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
22746 enum ix86_fpcmp_strategy
22747 ix86_fp_comparison_strategy (enum rtx_code)
22749 /* Do fcomi/sahf based test when profitable. */
22751 if (TARGET_CMOVE)
22752 return IX86_FPCMP_COMI;
22754 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
22755 return IX86_FPCMP_SAHF;
22757 return IX86_FPCMP_ARITH;
22760 /* Swap, force into registers, or otherwise massage the two operands
22761 to a fp comparison. The operands are updated in place; the new
22762 comparison code is returned. */
22764 static enum rtx_code
22765 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
22767 bool unordered_compare = ix86_unordered_fp_compare (code);
22768 rtx op0 = *pop0, op1 = *pop1;
22769 machine_mode op_mode = GET_MODE (op0);
22770 bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
22772 /* All of the unordered compare instructions only work on registers.
22773 The same is true of the fcomi compare instructions. The XFmode
22774 compare instructions require registers except when comparing
22775 against zero or when converting operand 1 from fixed point to
22776 floating point. */
22778 if (!is_sse
22779 && (unordered_compare
22780 || (op_mode == XFmode
22781 && ! (standard_80387_constant_p (op0) == 1
22782 || standard_80387_constant_p (op1) == 1)
22783 && GET_CODE (op1) != FLOAT)
22784 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
22786 op0 = force_reg (op_mode, op0);
22787 op1 = force_reg (op_mode, op1);
22789 else
22791 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
22792 things around if they appear profitable, otherwise force op0
22793 into a register. */
22795 if (standard_80387_constant_p (op0) == 0
22796 || (MEM_P (op0)
22797 && ! (standard_80387_constant_p (op1) == 0
22798 || MEM_P (op1))))
22800 enum rtx_code new_code = ix86_fp_swap_condition (code);
22801 if (new_code != UNKNOWN)
22803 std::swap (op0, op1);
22804 code = new_code;
22808 if (!REG_P (op0))
22809 op0 = force_reg (op_mode, op0);
22811 if (CONSTANT_P (op1))
22813 int tmp = standard_80387_constant_p (op1);
22814 if (tmp == 0)
22815 op1 = validize_mem (force_const_mem (op_mode, op1));
22816 else if (tmp == 1)
22818 if (TARGET_CMOVE)
22819 op1 = force_reg (op_mode, op1);
22821 else
22822 op1 = force_reg (op_mode, op1);
22826 /* Try to rearrange the comparison to make it cheaper. */
22827 if (ix86_fp_comparison_cost (code)
22828 > ix86_fp_comparison_cost (swap_condition (code))
22829 && (REG_P (op1) || can_create_pseudo_p ()))
22831 std::swap (op0, op1);
22832 code = swap_condition (code);
22833 if (!REG_P (op0))
22834 op0 = force_reg (op_mode, op0);
22837 *pop0 = op0;
22838 *pop1 = op1;
22839 return code;
22842 /* Convert comparison codes we use to represent FP comparison to integer
22843 code that will result in proper branch. Return UNKNOWN if no such code
22844 is available. */
22846 enum rtx_code
22847 ix86_fp_compare_code_to_integer (enum rtx_code code)
22849 switch (code)
22851 case GT:
22852 return GTU;
22853 case GE:
22854 return GEU;
22855 case ORDERED:
22856 case UNORDERED:
22857 return code;
22858 case UNEQ:
22859 return EQ;
22860 case UNLT:
22861 return LTU;
22862 case UNLE:
22863 return LEU;
22864 case LTGT:
22865 return NE;
22866 default:
22867 return UNKNOWN;
22871 /* Generate insn patterns to do a floating point compare of OPERANDS. */
22873 static rtx
22874 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
22876 bool unordered_compare = ix86_unordered_fp_compare (code);
22877 machine_mode intcmp_mode;
22878 rtx tmp, tmp2;
22880 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
22882 /* Do fcomi/sahf based test when profitable. */
22883 switch (ix86_fp_comparison_strategy (code))
22885 case IX86_FPCMP_COMI:
22886 intcmp_mode = CCFPmode;
22887 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22888 if (unordered_compare)
22889 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22890 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp));
22891 break;
22893 case IX86_FPCMP_SAHF:
22894 intcmp_mode = CCFPmode;
22895 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22896 if (unordered_compare)
22897 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22898 tmp = gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp);
22899 if (!scratch)
22900 scratch = gen_reg_rtx (HImode);
22901 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
22902 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
22903 break;
22905 case IX86_FPCMP_ARITH:
22906 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
22907 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22908 if (unordered_compare)
22909 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22910 tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
22911 if (!scratch)
22912 scratch = gen_reg_rtx (HImode);
22913 emit_insn (gen_rtx_SET (scratch, tmp));
22915 /* In the unordered case, we have to check C2 for NaN's, which
22916 doesn't happen to work out to anything nice combination-wise.
22917 So do some bit twiddling on the value we've got in AH to come
22918 up with an appropriate set of condition codes. */
22920 intcmp_mode = CCNOmode;
22921 switch (code)
22923 case GT:
22924 case UNGT:
22925 if (code == GT || !TARGET_IEEE_FP)
22927 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
22928 code = EQ;
22930 else
22932 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22933 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22934 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
22935 intcmp_mode = CCmode;
22936 code = GEU;
22938 break;
22939 case LT:
22940 case UNLT:
22941 if (code == LT && TARGET_IEEE_FP)
22943 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22944 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
22945 intcmp_mode = CCmode;
22946 code = EQ;
22948 else
22950 emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx));
22951 code = NE;
22953 break;
22954 case GE:
22955 case UNGE:
22956 if (code == GE || !TARGET_IEEE_FP)
22958 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05)));
22959 code = EQ;
22961 else
22963 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22964 emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx));
22965 code = NE;
22967 break;
22968 case LE:
22969 case UNLE:
22970 if (code == LE && TARGET_IEEE_FP)
22972 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22973 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22974 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22975 intcmp_mode = CCmode;
22976 code = LTU;
22978 else
22980 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
22981 code = NE;
22983 break;
22984 case EQ:
22985 case UNEQ:
22986 if (code == EQ && TARGET_IEEE_FP)
22988 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22989 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22990 intcmp_mode = CCmode;
22991 code = EQ;
22993 else
22995 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
22996 code = NE;
22998 break;
22999 case NE:
23000 case LTGT:
23001 if (code == NE && TARGET_IEEE_FP)
23003 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
23004 emit_insn (gen_xorqi_ext_1_cc (scratch, scratch,
23005 GEN_INT (0x40)));
23006 code = NE;
23008 else
23010 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
23011 code = EQ;
23013 break;
23015 case UNORDERED:
23016 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
23017 code = NE;
23018 break;
23019 case ORDERED:
23020 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
23021 code = EQ;
23022 break;
23024 default:
23025 gcc_unreachable ();
23027 break;
23029 default:
23030 gcc_unreachable();
23033 /* Return the test that should be put into the flags user, i.e.
23034 the bcc, scc, or cmov instruction. */
23035 return gen_rtx_fmt_ee (code, VOIDmode,
23036 gen_rtx_REG (intcmp_mode, FLAGS_REG),
23037 const0_rtx);
23040 static rtx
23041 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
23043 rtx ret;
23045 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
23046 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
23048 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
23050 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
23051 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
23053 else
23054 ret = ix86_expand_int_compare (code, op0, op1);
23056 return ret;
23059 void
23060 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
23062 machine_mode mode = GET_MODE (op0);
23063 rtx tmp;
23065 /* Handle special case - vector comparsion with boolean result, transform
23066 it using ptest instruction. */
23067 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23069 rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG);
23070 machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode;
23072 gcc_assert (code == EQ || code == NE);
23073 /* Generate XOR since we can't check that one operand is zero vector. */
23074 tmp = gen_reg_rtx (mode);
23075 emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1)));
23076 tmp = gen_lowpart (p_mode, tmp);
23077 emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG),
23078 gen_rtx_UNSPEC (CCmode,
23079 gen_rtvec (2, tmp, tmp),
23080 UNSPEC_PTEST)));
23081 tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx);
23082 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
23083 gen_rtx_LABEL_REF (VOIDmode, label),
23084 pc_rtx);
23085 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
23086 return;
23089 switch (mode)
23091 case E_SFmode:
23092 case E_DFmode:
23093 case E_XFmode:
23094 case E_QImode:
23095 case E_HImode:
23096 case E_SImode:
23097 simple:
23098 tmp = ix86_expand_compare (code, op0, op1);
23099 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
23100 gen_rtx_LABEL_REF (VOIDmode, label),
23101 pc_rtx);
23102 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
23103 return;
23105 case E_DImode:
23106 if (TARGET_64BIT)
23107 goto simple;
23108 /* For 32-bit target DI comparison may be performed on
23109 SSE registers. To allow this we should avoid split
23110 to SI mode which is achieved by doing xor in DI mode
23111 and then comparing with zero (which is recognized by
23112 STV pass). We don't compare using xor when optimizing
23113 for size. */
23114 if (!optimize_insn_for_size_p ()
23115 && TARGET_STV
23116 && (code == EQ || code == NE))
23118 op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1));
23119 op1 = const0_rtx;
23121 /* FALLTHRU */
23122 case E_TImode:
23123 /* Expand DImode branch into multiple compare+branch. */
23125 rtx lo[2], hi[2];
23126 rtx_code_label *label2;
23127 enum rtx_code code1, code2, code3;
23128 machine_mode submode;
23130 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
23132 std::swap (op0, op1);
23133 code = swap_condition (code);
23136 split_double_mode (mode, &op0, 1, lo+0, hi+0);
23137 split_double_mode (mode, &op1, 1, lo+1, hi+1);
23139 submode = mode == DImode ? SImode : DImode;
23141 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
23142 avoid two branches. This costs one extra insn, so disable when
23143 optimizing for size. */
23145 if ((code == EQ || code == NE)
23146 && (!optimize_insn_for_size_p ()
23147 || hi[1] == const0_rtx || lo[1] == const0_rtx))
23149 rtx xor0, xor1;
23151 xor1 = hi[0];
23152 if (hi[1] != const0_rtx)
23153 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
23154 NULL_RTX, 0, OPTAB_WIDEN);
23156 xor0 = lo[0];
23157 if (lo[1] != const0_rtx)
23158 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
23159 NULL_RTX, 0, OPTAB_WIDEN);
23161 tmp = expand_binop (submode, ior_optab, xor1, xor0,
23162 NULL_RTX, 0, OPTAB_WIDEN);
23164 ix86_expand_branch (code, tmp, const0_rtx, label);
23165 return;
23168 /* Otherwise, if we are doing less-than or greater-or-equal-than,
23169 op1 is a constant and the low word is zero, then we can just
23170 examine the high word. Similarly for low word -1 and
23171 less-or-equal-than or greater-than. */
23173 if (CONST_INT_P (hi[1]))
23174 switch (code)
23176 case LT: case LTU: case GE: case GEU:
23177 if (lo[1] == const0_rtx)
23179 ix86_expand_branch (code, hi[0], hi[1], label);
23180 return;
23182 break;
23183 case LE: case LEU: case GT: case GTU:
23184 if (lo[1] == constm1_rtx)
23186 ix86_expand_branch (code, hi[0], hi[1], label);
23187 return;
23189 break;
23190 default:
23191 break;
23194 /* Emulate comparisons that do not depend on Zero flag with
23195 double-word subtraction. Note that only Overflow, Sign
23196 and Carry flags are valid, so swap arguments and condition
23197 of comparisons that would otherwise test Zero flag. */
23199 switch (code)
23201 case LE: case LEU: case GT: case GTU:
23202 std::swap (lo[0], lo[1]);
23203 std::swap (hi[0], hi[1]);
23204 code = swap_condition (code);
23205 /* FALLTHRU */
23207 case LT: case LTU: case GE: case GEU:
23209 rtx (*cmp_insn) (rtx, rtx);
23210 rtx (*sbb_insn) (rtx, rtx, rtx);
23211 bool uns = (code == LTU || code == GEU);
23213 if (TARGET_64BIT)
23215 cmp_insn = gen_cmpdi_1;
23216 sbb_insn
23217 = uns ? gen_subdi3_carry_ccc : gen_subdi3_carry_ccgz;
23219 else
23221 cmp_insn = gen_cmpsi_1;
23222 sbb_insn
23223 = uns ? gen_subsi3_carry_ccc : gen_subsi3_carry_ccgz;
23226 if (!nonimmediate_operand (lo[0], submode))
23227 lo[0] = force_reg (submode, lo[0]);
23228 if (!x86_64_general_operand (lo[1], submode))
23229 lo[1] = force_reg (submode, lo[1]);
23231 if (!register_operand (hi[0], submode))
23232 hi[0] = force_reg (submode, hi[0]);
23233 if ((uns && !nonimmediate_operand (hi[1], submode))
23234 || (!uns && !x86_64_general_operand (hi[1], submode)))
23235 hi[1] = force_reg (submode, hi[1]);
23237 emit_insn (cmp_insn (lo[0], lo[1]));
23238 emit_insn (sbb_insn (gen_rtx_SCRATCH (submode), hi[0], hi[1]));
23240 tmp = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
23242 ix86_expand_branch (code, tmp, const0_rtx, label);
23243 return;
23246 default:
23247 break;
23250 /* Otherwise, we need two or three jumps. */
23252 label2 = gen_label_rtx ();
23254 code1 = code;
23255 code2 = swap_condition (code);
23256 code3 = unsigned_condition (code);
23258 switch (code)
23260 case LT: case GT: case LTU: case GTU:
23261 break;
23263 case LE: code1 = LT; code2 = GT; break;
23264 case GE: code1 = GT; code2 = LT; break;
23265 case LEU: code1 = LTU; code2 = GTU; break;
23266 case GEU: code1 = GTU; code2 = LTU; break;
23268 case EQ: code1 = UNKNOWN; code2 = NE; break;
23269 case NE: code2 = UNKNOWN; break;
23271 default:
23272 gcc_unreachable ();
23276 * a < b =>
23277 * if (hi(a) < hi(b)) goto true;
23278 * if (hi(a) > hi(b)) goto false;
23279 * if (lo(a) < lo(b)) goto true;
23280 * false:
23283 if (code1 != UNKNOWN)
23284 ix86_expand_branch (code1, hi[0], hi[1], label);
23285 if (code2 != UNKNOWN)
23286 ix86_expand_branch (code2, hi[0], hi[1], label2);
23288 ix86_expand_branch (code3, lo[0], lo[1], label);
23290 if (code2 != UNKNOWN)
23291 emit_label (label2);
23292 return;
23295 default:
23296 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
23297 goto simple;
23301 void
23302 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
23304 rtx ret;
23306 gcc_assert (GET_MODE (dest) == QImode);
23308 ret = ix86_expand_compare (code, op0, op1);
23309 PUT_MODE (ret, QImode);
23310 emit_insn (gen_rtx_SET (dest, ret));
23313 /* Expand comparison setting or clearing carry flag. Return true when
23314 successful and set pop for the operation. */
23315 static bool
23316 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
23318 machine_mode mode =
23319 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
23321 /* Do not handle double-mode compares that go through special path. */
23322 if (mode == (TARGET_64BIT ? TImode : DImode))
23323 return false;
23325 if (SCALAR_FLOAT_MODE_P (mode))
23327 rtx compare_op;
23328 rtx_insn *compare_seq;
23330 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
23332 /* Shortcut: following common codes never translate
23333 into carry flag compares. */
23334 if (code == EQ || code == NE || code == UNEQ || code == LTGT
23335 || code == ORDERED || code == UNORDERED)
23336 return false;
23338 /* These comparisons require zero flag; swap operands so they won't. */
23339 if ((code == GT || code == UNLE || code == LE || code == UNGT)
23340 && !TARGET_IEEE_FP)
23342 std::swap (op0, op1);
23343 code = swap_condition (code);
23346 /* Try to expand the comparison and verify that we end up with
23347 carry flag based comparison. This fails to be true only when
23348 we decide to expand comparison using arithmetic that is not
23349 too common scenario. */
23350 start_sequence ();
23351 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
23352 compare_seq = get_insns ();
23353 end_sequence ();
23355 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode)
23356 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
23357 else
23358 code = GET_CODE (compare_op);
23360 if (code != LTU && code != GEU)
23361 return false;
23363 emit_insn (compare_seq);
23364 *pop = compare_op;
23365 return true;
23368 if (!INTEGRAL_MODE_P (mode))
23369 return false;
23371 switch (code)
23373 case LTU:
23374 case GEU:
23375 break;
23377 /* Convert a==0 into (unsigned)a<1. */
23378 case EQ:
23379 case NE:
23380 if (op1 != const0_rtx)
23381 return false;
23382 op1 = const1_rtx;
23383 code = (code == EQ ? LTU : GEU);
23384 break;
23386 /* Convert a>b into b<a or a>=b-1. */
23387 case GTU:
23388 case LEU:
23389 if (CONST_INT_P (op1))
23391 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
23392 /* Bail out on overflow. We still can swap operands but that
23393 would force loading of the constant into register. */
23394 if (op1 == const0_rtx
23395 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
23396 return false;
23397 code = (code == GTU ? GEU : LTU);
23399 else
23401 std::swap (op0, op1);
23402 code = (code == GTU ? LTU : GEU);
23404 break;
23406 /* Convert a>=0 into (unsigned)a<0x80000000. */
23407 case LT:
23408 case GE:
23409 if (mode == DImode || op1 != const0_rtx)
23410 return false;
23411 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
23412 code = (code == LT ? GEU : LTU);
23413 break;
23414 case LE:
23415 case GT:
23416 if (mode == DImode || op1 != constm1_rtx)
23417 return false;
23418 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
23419 code = (code == LE ? GEU : LTU);
23420 break;
23422 default:
23423 return false;
23425 /* Swapping operands may cause constant to appear as first operand. */
23426 if (!nonimmediate_operand (op0, VOIDmode))
23428 if (!can_create_pseudo_p ())
23429 return false;
23430 op0 = force_reg (mode, op0);
23432 *pop = ix86_expand_compare (code, op0, op1);
23433 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
23434 return true;
23437 bool
23438 ix86_expand_int_movcc (rtx operands[])
23440 enum rtx_code code = GET_CODE (operands[1]), compare_code;
23441 rtx_insn *compare_seq;
23442 rtx compare_op;
23443 machine_mode mode = GET_MODE (operands[0]);
23444 bool sign_bit_compare_p = false;
23445 rtx op0 = XEXP (operands[1], 0);
23446 rtx op1 = XEXP (operands[1], 1);
23448 if (GET_MODE (op0) == TImode
23449 || (GET_MODE (op0) == DImode
23450 && !TARGET_64BIT))
23451 return false;
23453 start_sequence ();
23454 compare_op = ix86_expand_compare (code, op0, op1);
23455 compare_seq = get_insns ();
23456 end_sequence ();
23458 compare_code = GET_CODE (compare_op);
23460 if ((op1 == const0_rtx && (code == GE || code == LT))
23461 || (op1 == constm1_rtx && (code == GT || code == LE)))
23462 sign_bit_compare_p = true;
23464 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
23465 HImode insns, we'd be swallowed in word prefix ops. */
23467 if ((mode != HImode || TARGET_FAST_PREFIX)
23468 && (mode != (TARGET_64BIT ? TImode : DImode))
23469 && CONST_INT_P (operands[2])
23470 && CONST_INT_P (operands[3]))
23472 rtx out = operands[0];
23473 HOST_WIDE_INT ct = INTVAL (operands[2]);
23474 HOST_WIDE_INT cf = INTVAL (operands[3]);
23475 HOST_WIDE_INT diff;
23477 diff = ct - cf;
23478 /* Sign bit compares are better done using shifts than we do by using
23479 sbb. */
23480 if (sign_bit_compare_p
23481 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
23483 /* Detect overlap between destination and compare sources. */
23484 rtx tmp = out;
23486 if (!sign_bit_compare_p)
23488 rtx flags;
23489 bool fpcmp = false;
23491 compare_code = GET_CODE (compare_op);
23493 flags = XEXP (compare_op, 0);
23495 if (GET_MODE (flags) == CCFPmode)
23497 fpcmp = true;
23498 compare_code
23499 = ix86_fp_compare_code_to_integer (compare_code);
23502 /* To simplify rest of code, restrict to the GEU case. */
23503 if (compare_code == LTU)
23505 std::swap (ct, cf);
23506 compare_code = reverse_condition (compare_code);
23507 code = reverse_condition (code);
23509 else
23511 if (fpcmp)
23512 PUT_CODE (compare_op,
23513 reverse_condition_maybe_unordered
23514 (GET_CODE (compare_op)));
23515 else
23516 PUT_CODE (compare_op,
23517 reverse_condition (GET_CODE (compare_op)));
23519 diff = ct - cf;
23521 if (reg_overlap_mentioned_p (out, op0)
23522 || reg_overlap_mentioned_p (out, op1))
23523 tmp = gen_reg_rtx (mode);
23525 if (mode == DImode)
23526 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
23527 else
23528 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
23529 flags, compare_op));
23531 else
23533 if (code == GT || code == GE)
23534 code = reverse_condition (code);
23535 else
23537 std::swap (ct, cf);
23538 diff = ct - cf;
23540 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
23543 if (diff == 1)
23546 * cmpl op0,op1
23547 * sbbl dest,dest
23548 * [addl dest, ct]
23550 * Size 5 - 8.
23552 if (ct)
23553 tmp = expand_simple_binop (mode, PLUS,
23554 tmp, GEN_INT (ct),
23555 copy_rtx (tmp), 1, OPTAB_DIRECT);
23557 else if (cf == -1)
23560 * cmpl op0,op1
23561 * sbbl dest,dest
23562 * orl $ct, dest
23564 * Size 8.
23566 tmp = expand_simple_binop (mode, IOR,
23567 tmp, GEN_INT (ct),
23568 copy_rtx (tmp), 1, OPTAB_DIRECT);
23570 else if (diff == -1 && ct)
23573 * cmpl op0,op1
23574 * sbbl dest,dest
23575 * notl dest
23576 * [addl dest, cf]
23578 * Size 8 - 11.
23580 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
23581 if (cf)
23582 tmp = expand_simple_binop (mode, PLUS,
23583 copy_rtx (tmp), GEN_INT (cf),
23584 copy_rtx (tmp), 1, OPTAB_DIRECT);
23586 else
23589 * cmpl op0,op1
23590 * sbbl dest,dest
23591 * [notl dest]
23592 * andl cf - ct, dest
23593 * [addl dest, ct]
23595 * Size 8 - 11.
23598 if (cf == 0)
23600 cf = ct;
23601 ct = 0;
23602 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
23605 tmp = expand_simple_binop (mode, AND,
23606 copy_rtx (tmp),
23607 gen_int_mode (cf - ct, mode),
23608 copy_rtx (tmp), 1, OPTAB_DIRECT);
23609 if (ct)
23610 tmp = expand_simple_binop (mode, PLUS,
23611 copy_rtx (tmp), GEN_INT (ct),
23612 copy_rtx (tmp), 1, OPTAB_DIRECT);
23615 if (!rtx_equal_p (tmp, out))
23616 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
23618 return true;
23621 if (diff < 0)
23623 machine_mode cmp_mode = GET_MODE (op0);
23624 enum rtx_code new_code;
23626 if (SCALAR_FLOAT_MODE_P (cmp_mode))
23628 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
23630 /* We may be reversing unordered compare to normal compare, that
23631 is not valid in general (we may convert non-trapping condition
23632 to trapping one), however on i386 we currently emit all
23633 comparisons unordered. */
23634 new_code = reverse_condition_maybe_unordered (code);
23636 else
23637 new_code = ix86_reverse_condition (code, cmp_mode);
23638 if (new_code != UNKNOWN)
23640 std::swap (ct, cf);
23641 diff = -diff;
23642 code = new_code;
23646 compare_code = UNKNOWN;
23647 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
23648 && CONST_INT_P (op1))
23650 if (op1 == const0_rtx
23651 && (code == LT || code == GE))
23652 compare_code = code;
23653 else if (op1 == constm1_rtx)
23655 if (code == LE)
23656 compare_code = LT;
23657 else if (code == GT)
23658 compare_code = GE;
23662 /* Optimize dest = (op0 < 0) ? -1 : cf. */
23663 if (compare_code != UNKNOWN
23664 && GET_MODE (op0) == GET_MODE (out)
23665 && (cf == -1 || ct == -1))
23667 /* If lea code below could be used, only optimize
23668 if it results in a 2 insn sequence. */
23670 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
23671 || diff == 3 || diff == 5 || diff == 9)
23672 || (compare_code == LT && ct == -1)
23673 || (compare_code == GE && cf == -1))
23676 * notl op1 (if necessary)
23677 * sarl $31, op1
23678 * orl cf, op1
23680 if (ct != -1)
23682 cf = ct;
23683 ct = -1;
23684 code = reverse_condition (code);
23687 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
23689 out = expand_simple_binop (mode, IOR,
23690 out, GEN_INT (cf),
23691 out, 1, OPTAB_DIRECT);
23692 if (out != operands[0])
23693 emit_move_insn (operands[0], out);
23695 return true;
23700 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
23701 || diff == 3 || diff == 5 || diff == 9)
23702 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
23703 && (mode != DImode
23704 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
23707 * xorl dest,dest
23708 * cmpl op1,op2
23709 * setcc dest
23710 * lea cf(dest*(ct-cf)),dest
23712 * Size 14.
23714 * This also catches the degenerate setcc-only case.
23717 rtx tmp;
23718 int nops;
23720 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
23722 nops = 0;
23723 /* On x86_64 the lea instruction operates on Pmode, so we need
23724 to get arithmetics done in proper mode to match. */
23725 if (diff == 1)
23726 tmp = copy_rtx (out);
23727 else
23729 rtx out1;
23730 out1 = copy_rtx (out);
23731 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
23732 nops++;
23733 if (diff & 1)
23735 tmp = gen_rtx_PLUS (mode, tmp, out1);
23736 nops++;
23739 if (cf != 0)
23741 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
23742 nops++;
23744 if (!rtx_equal_p (tmp, out))
23746 if (nops == 1)
23747 out = force_operand (tmp, copy_rtx (out));
23748 else
23749 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
23751 if (!rtx_equal_p (out, operands[0]))
23752 emit_move_insn (operands[0], copy_rtx (out));
23754 return true;
23758 * General case: Jumpful:
23759 * xorl dest,dest cmpl op1, op2
23760 * cmpl op1, op2 movl ct, dest
23761 * setcc dest jcc 1f
23762 * decl dest movl cf, dest
23763 * andl (cf-ct),dest 1:
23764 * addl ct,dest
23766 * Size 20. Size 14.
23768 * This is reasonably steep, but branch mispredict costs are
23769 * high on modern cpus, so consider failing only if optimizing
23770 * for space.
23773 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23774 && BRANCH_COST (optimize_insn_for_speed_p (),
23775 false) >= 2)
23777 if (cf == 0)
23779 machine_mode cmp_mode = GET_MODE (op0);
23780 enum rtx_code new_code;
23782 if (SCALAR_FLOAT_MODE_P (cmp_mode))
23784 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
23786 /* We may be reversing unordered compare to normal compare,
23787 that is not valid in general (we may convert non-trapping
23788 condition to trapping one), however on i386 we currently
23789 emit all comparisons unordered. */
23790 new_code = reverse_condition_maybe_unordered (code);
23792 else
23794 new_code = ix86_reverse_condition (code, cmp_mode);
23795 if (compare_code != UNKNOWN && new_code != UNKNOWN)
23796 compare_code = reverse_condition (compare_code);
23799 if (new_code != UNKNOWN)
23801 cf = ct;
23802 ct = 0;
23803 code = new_code;
23807 if (compare_code != UNKNOWN)
23809 /* notl op1 (if needed)
23810 sarl $31, op1
23811 andl (cf-ct), op1
23812 addl ct, op1
23814 For x < 0 (resp. x <= -1) there will be no notl,
23815 so if possible swap the constants to get rid of the
23816 complement.
23817 True/false will be -1/0 while code below (store flag
23818 followed by decrement) is 0/-1, so the constants need
23819 to be exchanged once more. */
23821 if (compare_code == GE || !cf)
23823 code = reverse_condition (code);
23824 compare_code = LT;
23826 else
23827 std::swap (ct, cf);
23829 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
23831 else
23833 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
23835 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
23836 constm1_rtx,
23837 copy_rtx (out), 1, OPTAB_DIRECT);
23840 out = expand_simple_binop (mode, AND, copy_rtx (out),
23841 gen_int_mode (cf - ct, mode),
23842 copy_rtx (out), 1, OPTAB_DIRECT);
23843 if (ct)
23844 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
23845 copy_rtx (out), 1, OPTAB_DIRECT);
23846 if (!rtx_equal_p (out, operands[0]))
23847 emit_move_insn (operands[0], copy_rtx (out));
23849 return true;
23853 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23855 /* Try a few things more with specific constants and a variable. */
23857 optab op;
23858 rtx var, orig_out, out, tmp;
23860 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
23861 return false;
23863 /* If one of the two operands is an interesting constant, load a
23864 constant with the above and mask it in with a logical operation. */
23866 if (CONST_INT_P (operands[2]))
23868 var = operands[3];
23869 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
23870 operands[3] = constm1_rtx, op = and_optab;
23871 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
23872 operands[3] = const0_rtx, op = ior_optab;
23873 else
23874 return false;
23876 else if (CONST_INT_P (operands[3]))
23878 var = operands[2];
23879 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
23880 operands[2] = constm1_rtx, op = and_optab;
23881 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
23882 operands[2] = const0_rtx, op = ior_optab;
23883 else
23884 return false;
23886 else
23887 return false;
23889 orig_out = operands[0];
23890 tmp = gen_reg_rtx (mode);
23891 operands[0] = tmp;
23893 /* Recurse to get the constant loaded. */
23894 if (!ix86_expand_int_movcc (operands))
23895 return false;
23897 /* Mask in the interesting variable. */
23898 out = expand_binop (mode, op, var, tmp, orig_out, 0,
23899 OPTAB_WIDEN);
23900 if (!rtx_equal_p (out, orig_out))
23901 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
23903 return true;
23907 * For comparison with above,
23909 * movl cf,dest
23910 * movl ct,tmp
23911 * cmpl op1,op2
23912 * cmovcc tmp,dest
23914 * Size 15.
23917 if (! nonimmediate_operand (operands[2], mode))
23918 operands[2] = force_reg (mode, operands[2]);
23919 if (! nonimmediate_operand (operands[3], mode))
23920 operands[3] = force_reg (mode, operands[3]);
23922 if (! register_operand (operands[2], VOIDmode)
23923 && (mode == QImode
23924 || ! register_operand (operands[3], VOIDmode)))
23925 operands[2] = force_reg (mode, operands[2]);
23927 if (mode == QImode
23928 && ! register_operand (operands[3], VOIDmode))
23929 operands[3] = force_reg (mode, operands[3]);
23931 emit_insn (compare_seq);
23932 emit_insn (gen_rtx_SET (operands[0],
23933 gen_rtx_IF_THEN_ELSE (mode,
23934 compare_op, operands[2],
23935 operands[3])));
23936 return true;
23939 /* Swap, force into registers, or otherwise massage the two operands
23940 to an sse comparison with a mask result. Thus we differ a bit from
23941 ix86_prepare_fp_compare_args which expects to produce a flags result.
23943 The DEST operand exists to help determine whether to commute commutative
23944 operators. The POP0/POP1 operands are updated in place. The new
23945 comparison code is returned, or UNKNOWN if not implementable. */
23947 static enum rtx_code
23948 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
23949 rtx *pop0, rtx *pop1)
23951 switch (code)
23953 case LTGT:
23954 case UNEQ:
23955 /* AVX supports all the needed comparisons. */
23956 if (TARGET_AVX)
23957 break;
23958 /* We have no LTGT as an operator. We could implement it with
23959 NE & ORDERED, but this requires an extra temporary. It's
23960 not clear that it's worth it. */
23961 return UNKNOWN;
23963 case LT:
23964 case LE:
23965 case UNGT:
23966 case UNGE:
23967 /* These are supported directly. */
23968 break;
23970 case EQ:
23971 case NE:
23972 case UNORDERED:
23973 case ORDERED:
23974 /* AVX has 3 operand comparisons, no need to swap anything. */
23975 if (TARGET_AVX)
23976 break;
23977 /* For commutative operators, try to canonicalize the destination
23978 operand to be first in the comparison - this helps reload to
23979 avoid extra moves. */
23980 if (!dest || !rtx_equal_p (dest, *pop1))
23981 break;
23982 /* FALLTHRU */
23984 case GE:
23985 case GT:
23986 case UNLE:
23987 case UNLT:
23988 /* These are not supported directly before AVX, and furthermore
23989 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
23990 comparison operands to transform into something that is
23991 supported. */
23992 std::swap (*pop0, *pop1);
23993 code = swap_condition (code);
23994 break;
23996 default:
23997 gcc_unreachable ();
24000 return code;
24003 /* Detect conditional moves that exactly match min/max operational
24004 semantics. Note that this is IEEE safe, as long as we don't
24005 interchange the operands.
24007 Returns FALSE if this conditional move doesn't match a MIN/MAX,
24008 and TRUE if the operation is successful and instructions are emitted. */
24010 static bool
24011 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
24012 rtx cmp_op1, rtx if_true, rtx if_false)
24014 machine_mode mode;
24015 bool is_min;
24016 rtx tmp;
24018 if (code == LT)
24020 else if (code == UNGE)
24021 std::swap (if_true, if_false);
24022 else
24023 return false;
24025 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
24026 is_min = true;
24027 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
24028 is_min = false;
24029 else
24030 return false;
24032 mode = GET_MODE (dest);
24034 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
24035 but MODE may be a vector mode and thus not appropriate. */
24036 if (!flag_finite_math_only || flag_signed_zeros)
24038 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
24039 rtvec v;
24041 if_true = force_reg (mode, if_true);
24042 v = gen_rtvec (2, if_true, if_false);
24043 tmp = gen_rtx_UNSPEC (mode, v, u);
24045 else
24047 code = is_min ? SMIN : SMAX;
24048 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
24051 emit_insn (gen_rtx_SET (dest, tmp));
24052 return true;
24055 /* Expand an sse vector comparison. Return the register with the result. */
24057 static rtx
24058 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
24059 rtx op_true, rtx op_false)
24061 machine_mode mode = GET_MODE (dest);
24062 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
24064 /* In general case result of comparison can differ from operands' type. */
24065 machine_mode cmp_mode;
24067 /* In AVX512F the result of comparison is an integer mask. */
24068 bool maskcmp = false;
24069 rtx x;
24071 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
24073 unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
24074 cmp_mode = int_mode_for_size (nbits, 0).require ();
24075 maskcmp = true;
24077 else
24078 cmp_mode = cmp_ops_mode;
24081 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
24082 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
24083 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
24085 if (optimize
24086 || (maskcmp && cmp_mode != mode)
24087 || (op_true && reg_overlap_mentioned_p (dest, op_true))
24088 || (op_false && reg_overlap_mentioned_p (dest, op_false)))
24089 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
24091 /* Compare patterns for int modes are unspec in AVX512F only. */
24092 if (maskcmp && (code == GT || code == EQ))
24094 rtx (*gen)(rtx, rtx, rtx);
24096 switch (cmp_ops_mode)
24098 case E_V64QImode:
24099 gcc_assert (TARGET_AVX512BW);
24100 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
24101 break;
24102 case E_V32HImode:
24103 gcc_assert (TARGET_AVX512BW);
24104 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
24105 break;
24106 case E_V16SImode:
24107 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
24108 break;
24109 case E_V8DImode:
24110 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
24111 break;
24112 default:
24113 gen = NULL;
24116 if (gen)
24118 emit_insn (gen (dest, cmp_op0, cmp_op1));
24119 return dest;
24122 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
24124 if (cmp_mode != mode && !maskcmp)
24126 x = force_reg (cmp_ops_mode, x);
24127 convert_move (dest, x, false);
24129 else
24130 emit_insn (gen_rtx_SET (dest, x));
24132 return dest;
24135 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
24136 operations. This is used for both scalar and vector conditional moves. */
24138 void
24139 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
24141 machine_mode mode = GET_MODE (dest);
24142 machine_mode cmpmode = GET_MODE (cmp);
24144 /* In AVX512F the result of comparison is an integer mask. */
24145 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
24147 rtx t2, t3, x;
24149 /* If we have an integer mask and FP value then we need
24150 to cast mask to FP mode. */
24151 if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
24153 cmp = force_reg (cmpmode, cmp);
24154 cmp = gen_rtx_SUBREG (mode, cmp, 0);
24157 if (vector_all_ones_operand (op_true, mode)
24158 && rtx_equal_p (op_false, CONST0_RTX (mode))
24159 && !maskcmp)
24161 emit_insn (gen_rtx_SET (dest, cmp));
24163 else if (op_false == CONST0_RTX (mode)
24164 && !maskcmp)
24166 op_true = force_reg (mode, op_true);
24167 x = gen_rtx_AND (mode, cmp, op_true);
24168 emit_insn (gen_rtx_SET (dest, x));
24170 else if (op_true == CONST0_RTX (mode)
24171 && !maskcmp)
24173 op_false = force_reg (mode, op_false);
24174 x = gen_rtx_NOT (mode, cmp);
24175 x = gen_rtx_AND (mode, x, op_false);
24176 emit_insn (gen_rtx_SET (dest, x));
24178 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
24179 && !maskcmp)
24181 op_false = force_reg (mode, op_false);
24182 x = gen_rtx_IOR (mode, cmp, op_false);
24183 emit_insn (gen_rtx_SET (dest, x));
24185 else if (TARGET_XOP
24186 && !maskcmp)
24188 op_true = force_reg (mode, op_true);
24190 if (!nonimmediate_operand (op_false, mode))
24191 op_false = force_reg (mode, op_false);
24193 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
24194 op_true,
24195 op_false)));
24197 else
24199 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
24200 rtx d = dest;
24202 if (!nonimmediate_operand (op_true, mode))
24203 op_true = force_reg (mode, op_true);
24205 op_false = force_reg (mode, op_false);
24207 switch (mode)
24209 case E_V4SFmode:
24210 if (TARGET_SSE4_1)
24211 gen = gen_sse4_1_blendvps;
24212 break;
24213 case E_V2DFmode:
24214 if (TARGET_SSE4_1)
24215 gen = gen_sse4_1_blendvpd;
24216 break;
24217 case E_V16QImode:
24218 case E_V8HImode:
24219 case E_V4SImode:
24220 case E_V2DImode:
24221 if (TARGET_SSE4_1)
24223 gen = gen_sse4_1_pblendvb;
24224 if (mode != V16QImode)
24225 d = gen_reg_rtx (V16QImode);
24226 op_false = gen_lowpart (V16QImode, op_false);
24227 op_true = gen_lowpart (V16QImode, op_true);
24228 cmp = gen_lowpart (V16QImode, cmp);
24230 break;
24231 case E_V8SFmode:
24232 if (TARGET_AVX)
24233 gen = gen_avx_blendvps256;
24234 break;
24235 case E_V4DFmode:
24236 if (TARGET_AVX)
24237 gen = gen_avx_blendvpd256;
24238 break;
24239 case E_V32QImode:
24240 case E_V16HImode:
24241 case E_V8SImode:
24242 case E_V4DImode:
24243 if (TARGET_AVX2)
24245 gen = gen_avx2_pblendvb;
24246 if (mode != V32QImode)
24247 d = gen_reg_rtx (V32QImode);
24248 op_false = gen_lowpart (V32QImode, op_false);
24249 op_true = gen_lowpart (V32QImode, op_true);
24250 cmp = gen_lowpart (V32QImode, cmp);
24252 break;
24254 case E_V64QImode:
24255 gen = gen_avx512bw_blendmv64qi;
24256 break;
24257 case E_V32HImode:
24258 gen = gen_avx512bw_blendmv32hi;
24259 break;
24260 case E_V16SImode:
24261 gen = gen_avx512f_blendmv16si;
24262 break;
24263 case E_V8DImode:
24264 gen = gen_avx512f_blendmv8di;
24265 break;
24266 case E_V8DFmode:
24267 gen = gen_avx512f_blendmv8df;
24268 break;
24269 case E_V16SFmode:
24270 gen = gen_avx512f_blendmv16sf;
24271 break;
24273 default:
24274 break;
24277 if (gen != NULL)
24279 emit_insn (gen (d, op_false, op_true, cmp));
24280 if (d != dest)
24281 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
24283 else
24285 op_true = force_reg (mode, op_true);
24287 t2 = gen_reg_rtx (mode);
24288 if (optimize)
24289 t3 = gen_reg_rtx (mode);
24290 else
24291 t3 = dest;
24293 x = gen_rtx_AND (mode, op_true, cmp);
24294 emit_insn (gen_rtx_SET (t2, x));
24296 x = gen_rtx_NOT (mode, cmp);
24297 x = gen_rtx_AND (mode, x, op_false);
24298 emit_insn (gen_rtx_SET (t3, x));
24300 x = gen_rtx_IOR (mode, t3, t2);
24301 emit_insn (gen_rtx_SET (dest, x));
24306 /* Expand a floating-point conditional move. Return true if successful. */
24308 bool
24309 ix86_expand_fp_movcc (rtx operands[])
24311 machine_mode mode = GET_MODE (operands[0]);
24312 enum rtx_code code = GET_CODE (operands[1]);
24313 rtx tmp, compare_op;
24314 rtx op0 = XEXP (operands[1], 0);
24315 rtx op1 = XEXP (operands[1], 1);
24317 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
24319 machine_mode cmode;
24321 /* Since we've no cmove for sse registers, don't force bad register
24322 allocation just to gain access to it. Deny movcc when the
24323 comparison mode doesn't match the move mode. */
24324 cmode = GET_MODE (op0);
24325 if (cmode == VOIDmode)
24326 cmode = GET_MODE (op1);
24327 if (cmode != mode)
24328 return false;
24330 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
24331 if (code == UNKNOWN)
24332 return false;
24334 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
24335 operands[2], operands[3]))
24336 return true;
24338 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
24339 operands[2], operands[3]);
24340 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
24341 return true;
24344 if (GET_MODE (op0) == TImode
24345 || (GET_MODE (op0) == DImode
24346 && !TARGET_64BIT))
24347 return false;
24349 /* The floating point conditional move instructions don't directly
24350 support conditions resulting from a signed integer comparison. */
24352 compare_op = ix86_expand_compare (code, op0, op1);
24353 if (!fcmov_comparison_operator (compare_op, VOIDmode))
24355 tmp = gen_reg_rtx (QImode);
24356 ix86_expand_setcc (tmp, code, op0, op1);
24358 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
24361 emit_insn (gen_rtx_SET (operands[0],
24362 gen_rtx_IF_THEN_ELSE (mode, compare_op,
24363 operands[2], operands[3])));
24365 return true;
24368 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
24370 static int
24371 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
24373 switch (code)
24375 case EQ:
24376 return 0;
24377 case LT:
24378 case LTU:
24379 return 1;
24380 case LE:
24381 case LEU:
24382 return 2;
24383 case NE:
24384 return 4;
24385 case GE:
24386 case GEU:
24387 return 5;
24388 case GT:
24389 case GTU:
24390 return 6;
24391 default:
24392 gcc_unreachable ();
24396 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
24398 static int
24399 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
24401 switch (code)
24403 case EQ:
24404 return 0x00;
24405 case NE:
24406 return 0x04;
24407 case GT:
24408 return 0x0e;
24409 case LE:
24410 return 0x02;
24411 case GE:
24412 return 0x0d;
24413 case LT:
24414 return 0x01;
24415 case UNLE:
24416 return 0x0a;
24417 case UNLT:
24418 return 0x09;
24419 case UNGE:
24420 return 0x05;
24421 case UNGT:
24422 return 0x06;
24423 case UNEQ:
24424 return 0x18;
24425 case LTGT:
24426 return 0x0c;
24427 case ORDERED:
24428 return 0x07;
24429 case UNORDERED:
24430 return 0x03;
24431 default:
24432 gcc_unreachable ();
24436 /* Return immediate value to be used in UNSPEC_PCMP
24437 for comparison CODE in MODE. */
24439 static int
24440 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
24442 if (FLOAT_MODE_P (mode))
24443 return ix86_fp_cmp_code_to_pcmp_immediate (code);
24444 return ix86_int_cmp_code_to_pcmp_immediate (code);
24447 /* Expand AVX-512 vector comparison. */
24449 bool
24450 ix86_expand_mask_vec_cmp (rtx operands[])
24452 machine_mode mask_mode = GET_MODE (operands[0]);
24453 machine_mode cmp_mode = GET_MODE (operands[2]);
24454 enum rtx_code code = GET_CODE (operands[1]);
24455 rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
24456 int unspec_code;
24457 rtx unspec;
24459 switch (code)
24461 case LEU:
24462 case GTU:
24463 case GEU:
24464 case LTU:
24465 unspec_code = UNSPEC_UNSIGNED_PCMP;
24466 break;
24468 default:
24469 unspec_code = UNSPEC_PCMP;
24472 unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
24473 operands[3], imm),
24474 unspec_code);
24475 emit_insn (gen_rtx_SET (operands[0], unspec));
24477 return true;
24480 /* Expand fp vector comparison. */
24482 bool
24483 ix86_expand_fp_vec_cmp (rtx operands[])
24485 enum rtx_code code = GET_CODE (operands[1]);
24486 rtx cmp;
24488 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
24489 &operands[2], &operands[3]);
24490 if (code == UNKNOWN)
24492 rtx temp;
24493 switch (GET_CODE (operands[1]))
24495 case LTGT:
24496 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
24497 operands[3], NULL, NULL);
24498 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
24499 operands[3], NULL, NULL);
24500 code = AND;
24501 break;
24502 case UNEQ:
24503 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
24504 operands[3], NULL, NULL);
24505 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
24506 operands[3], NULL, NULL);
24507 code = IOR;
24508 break;
24509 default:
24510 gcc_unreachable ();
24512 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
24513 OPTAB_DIRECT);
24515 else
24516 cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
24517 operands[1], operands[2]);
24519 if (operands[0] != cmp)
24520 emit_move_insn (operands[0], cmp);
24522 return true;
24525 static rtx
24526 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
24527 rtx op_true, rtx op_false, bool *negate)
24529 machine_mode data_mode = GET_MODE (dest);
24530 machine_mode mode = GET_MODE (cop0);
24531 rtx x;
24533 *negate = false;
24535 /* XOP supports all of the comparisons on all 128-bit vector int types. */
24536 if (TARGET_XOP
24537 && (mode == V16QImode || mode == V8HImode
24538 || mode == V4SImode || mode == V2DImode))
24540 else
24542 /* Canonicalize the comparison to EQ, GT, GTU. */
24543 switch (code)
24545 case EQ:
24546 case GT:
24547 case GTU:
24548 break;
24550 case NE:
24551 case LE:
24552 case LEU:
24553 code = reverse_condition (code);
24554 *negate = true;
24555 break;
24557 case GE:
24558 case GEU:
24559 code = reverse_condition (code);
24560 *negate = true;
24561 /* FALLTHRU */
24563 case LT:
24564 case LTU:
24565 std::swap (cop0, cop1);
24566 code = swap_condition (code);
24567 break;
24569 default:
24570 gcc_unreachable ();
24573 /* Only SSE4.1/SSE4.2 supports V2DImode. */
24574 if (mode == V2DImode)
24576 switch (code)
24578 case EQ:
24579 /* SSE4.1 supports EQ. */
24580 if (!TARGET_SSE4_1)
24581 return NULL;
24582 break;
24584 case GT:
24585 case GTU:
24586 /* SSE4.2 supports GT/GTU. */
24587 if (!TARGET_SSE4_2)
24588 return NULL;
24589 break;
24591 default:
24592 gcc_unreachable ();
24596 /* Unsigned parallel compare is not supported by the hardware.
24597 Play some tricks to turn this into a signed comparison
24598 against 0. */
24599 if (code == GTU)
24601 cop0 = force_reg (mode, cop0);
24603 switch (mode)
24605 case E_V16SImode:
24606 case E_V8DImode:
24607 case E_V8SImode:
24608 case E_V4DImode:
24609 case E_V4SImode:
24610 case E_V2DImode:
24612 rtx t1, t2, mask;
24613 rtx (*gen_sub3) (rtx, rtx, rtx);
24615 switch (mode)
24617 case E_V16SImode: gen_sub3 = gen_subv16si3; break;
24618 case E_V8DImode: gen_sub3 = gen_subv8di3; break;
24619 case E_V8SImode: gen_sub3 = gen_subv8si3; break;
24620 case E_V4DImode: gen_sub3 = gen_subv4di3; break;
24621 case E_V4SImode: gen_sub3 = gen_subv4si3; break;
24622 case E_V2DImode: gen_sub3 = gen_subv2di3; break;
24623 default:
24624 gcc_unreachable ();
24626 /* Subtract (-(INT MAX) - 1) from both operands to make
24627 them signed. */
24628 mask = ix86_build_signbit_mask (mode, true, false);
24629 t1 = gen_reg_rtx (mode);
24630 emit_insn (gen_sub3 (t1, cop0, mask));
24632 t2 = gen_reg_rtx (mode);
24633 emit_insn (gen_sub3 (t2, cop1, mask));
24635 cop0 = t1;
24636 cop1 = t2;
24637 code = GT;
24639 break;
24641 case E_V64QImode:
24642 case E_V32HImode:
24643 case E_V32QImode:
24644 case E_V16HImode:
24645 case E_V16QImode:
24646 case E_V8HImode:
24647 /* Perform a parallel unsigned saturating subtraction. */
24648 x = gen_reg_rtx (mode);
24649 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
24650 cop1)));
24652 cop0 = x;
24653 cop1 = CONST0_RTX (mode);
24654 code = EQ;
24655 *negate = !*negate;
24656 break;
24658 default:
24659 gcc_unreachable ();
24664 if (*negate)
24665 std::swap (op_true, op_false);
24667 /* Allow the comparison to be done in one mode, but the movcc to
24668 happen in another mode. */
24669 if (data_mode == mode)
24671 x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
24672 op_true, op_false);
24674 else
24676 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
24677 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
24678 op_true, op_false);
24679 if (GET_MODE (x) == mode)
24680 x = gen_lowpart (data_mode, x);
24683 return x;
24686 /* Expand integer vector comparison. */
24688 bool
24689 ix86_expand_int_vec_cmp (rtx operands[])
24691 rtx_code code = GET_CODE (operands[1]);
24692 bool negate = false;
24693 rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
24694 operands[3], NULL, NULL, &negate);
24696 if (!cmp)
24697 return false;
24699 if (negate)
24700 cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
24701 CONST0_RTX (GET_MODE (cmp)),
24702 NULL, NULL, &negate);
24704 gcc_assert (!negate);
24706 if (operands[0] != cmp)
24707 emit_move_insn (operands[0], cmp);
24709 return true;
24712 /* Expand a floating-point vector conditional move; a vcond operation
24713 rather than a movcc operation. */
24715 bool
24716 ix86_expand_fp_vcond (rtx operands[])
24718 enum rtx_code code = GET_CODE (operands[3]);
24719 rtx cmp;
24721 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
24722 &operands[4], &operands[5]);
24723 if (code == UNKNOWN)
24725 rtx temp;
24726 switch (GET_CODE (operands[3]))
24728 case LTGT:
24729 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
24730 operands[5], operands[0], operands[0]);
24731 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
24732 operands[5], operands[1], operands[2]);
24733 code = AND;
24734 break;
24735 case UNEQ:
24736 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
24737 operands[5], operands[0], operands[0]);
24738 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
24739 operands[5], operands[1], operands[2]);
24740 code = IOR;
24741 break;
24742 default:
24743 gcc_unreachable ();
24745 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
24746 OPTAB_DIRECT);
24747 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
24748 return true;
24751 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
24752 operands[5], operands[1], operands[2]))
24753 return true;
24755 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
24756 operands[1], operands[2]);
24757 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
24758 return true;
24761 /* Expand a signed/unsigned integral vector conditional move. */
24763 bool
24764 ix86_expand_int_vcond (rtx operands[])
24766 machine_mode data_mode = GET_MODE (operands[0]);
24767 machine_mode mode = GET_MODE (operands[4]);
24768 enum rtx_code code = GET_CODE (operands[3]);
24769 bool negate = false;
24770 rtx x, cop0, cop1;
24772 cop0 = operands[4];
24773 cop1 = operands[5];
24775 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
24776 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
24777 if ((code == LT || code == GE)
24778 && data_mode == mode
24779 && cop1 == CONST0_RTX (mode)
24780 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
24781 && GET_MODE_UNIT_SIZE (data_mode) > 1
24782 && GET_MODE_UNIT_SIZE (data_mode) <= 8
24783 && (GET_MODE_SIZE (data_mode) == 16
24784 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
24786 rtx negop = operands[2 - (code == LT)];
24787 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
24788 if (negop == CONST1_RTX (data_mode))
24790 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
24791 operands[0], 1, OPTAB_DIRECT);
24792 if (res != operands[0])
24793 emit_move_insn (operands[0], res);
24794 return true;
24796 else if (GET_MODE_INNER (data_mode) != DImode
24797 && vector_all_ones_operand (negop, data_mode))
24799 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
24800 operands[0], 0, OPTAB_DIRECT);
24801 if (res != operands[0])
24802 emit_move_insn (operands[0], res);
24803 return true;
24807 if (!nonimmediate_operand (cop1, mode))
24808 cop1 = force_reg (mode, cop1);
24809 if (!general_operand (operands[1], data_mode))
24810 operands[1] = force_reg (data_mode, operands[1]);
24811 if (!general_operand (operands[2], data_mode))
24812 operands[2] = force_reg (data_mode, operands[2]);
24814 x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
24815 operands[1], operands[2], &negate);
24817 if (!x)
24818 return false;
24820 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
24821 operands[2-negate]);
24822 return true;
24825 /* AVX512F does support 64-byte integer vector operations,
24826 thus the longest vector we are faced with is V64QImode. */
24827 #define MAX_VECT_LEN 64
24829 struct expand_vec_perm_d
24831 rtx target, op0, op1;
24832 unsigned char perm[MAX_VECT_LEN];
24833 machine_mode vmode;
24834 unsigned char nelt;
24835 bool one_operand_p;
24836 bool testing_p;
24839 static bool
24840 ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1,
24841 struct expand_vec_perm_d *d)
24843 /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24844 expander, so args are either in d, or in op0, op1 etc. */
24845 machine_mode mode = GET_MODE (d ? d->op0 : op0);
24846 machine_mode maskmode = mode;
24847 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
24849 switch (mode)
24851 case E_V8HImode:
24852 if (TARGET_AVX512VL && TARGET_AVX512BW)
24853 gen = gen_avx512vl_vpermt2varv8hi3;
24854 break;
24855 case E_V16HImode:
24856 if (TARGET_AVX512VL && TARGET_AVX512BW)
24857 gen = gen_avx512vl_vpermt2varv16hi3;
24858 break;
24859 case E_V64QImode:
24860 if (TARGET_AVX512VBMI)
24861 gen = gen_avx512bw_vpermt2varv64qi3;
24862 break;
24863 case E_V32HImode:
24864 if (TARGET_AVX512BW)
24865 gen = gen_avx512bw_vpermt2varv32hi3;
24866 break;
24867 case E_V4SImode:
24868 if (TARGET_AVX512VL)
24869 gen = gen_avx512vl_vpermt2varv4si3;
24870 break;
24871 case E_V8SImode:
24872 if (TARGET_AVX512VL)
24873 gen = gen_avx512vl_vpermt2varv8si3;
24874 break;
24875 case E_V16SImode:
24876 if (TARGET_AVX512F)
24877 gen = gen_avx512f_vpermt2varv16si3;
24878 break;
24879 case E_V4SFmode:
24880 if (TARGET_AVX512VL)
24882 gen = gen_avx512vl_vpermt2varv4sf3;
24883 maskmode = V4SImode;
24885 break;
24886 case E_V8SFmode:
24887 if (TARGET_AVX512VL)
24889 gen = gen_avx512vl_vpermt2varv8sf3;
24890 maskmode = V8SImode;
24892 break;
24893 case E_V16SFmode:
24894 if (TARGET_AVX512F)
24896 gen = gen_avx512f_vpermt2varv16sf3;
24897 maskmode = V16SImode;
24899 break;
24900 case E_V2DImode:
24901 if (TARGET_AVX512VL)
24902 gen = gen_avx512vl_vpermt2varv2di3;
24903 break;
24904 case E_V4DImode:
24905 if (TARGET_AVX512VL)
24906 gen = gen_avx512vl_vpermt2varv4di3;
24907 break;
24908 case E_V8DImode:
24909 if (TARGET_AVX512F)
24910 gen = gen_avx512f_vpermt2varv8di3;
24911 break;
24912 case E_V2DFmode:
24913 if (TARGET_AVX512VL)
24915 gen = gen_avx512vl_vpermt2varv2df3;
24916 maskmode = V2DImode;
24918 break;
24919 case E_V4DFmode:
24920 if (TARGET_AVX512VL)
24922 gen = gen_avx512vl_vpermt2varv4df3;
24923 maskmode = V4DImode;
24925 break;
24926 case E_V8DFmode:
24927 if (TARGET_AVX512F)
24929 gen = gen_avx512f_vpermt2varv8df3;
24930 maskmode = V8DImode;
24932 break;
24933 default:
24934 break;
24937 if (gen == NULL)
24938 return false;
24940 /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24941 expander, so args are either in d, or in op0, op1 etc. */
24942 if (d)
24944 rtx vec[64];
24945 target = d->target;
24946 op0 = d->op0;
24947 op1 = d->op1;
24948 for (int i = 0; i < d->nelt; ++i)
24949 vec[i] = GEN_INT (d->perm[i]);
24950 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
24953 emit_insn (gen (target, force_reg (maskmode, mask), op0, op1));
24954 return true;
24957 /* Expand a variable vector permutation. */
24959 void
24960 ix86_expand_vec_perm (rtx operands[])
24962 rtx target = operands[0];
24963 rtx op0 = operands[1];
24964 rtx op1 = operands[2];
24965 rtx mask = operands[3];
24966 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
24967 machine_mode mode = GET_MODE (op0);
24968 machine_mode maskmode = GET_MODE (mask);
24969 int w, e, i;
24970 bool one_operand_shuffle = rtx_equal_p (op0, op1);
24972 /* Number of elements in the vector. */
24973 w = GET_MODE_NUNITS (mode);
24974 e = GET_MODE_UNIT_SIZE (mode);
24975 gcc_assert (w <= 64);
24977 if (TARGET_AVX512F && one_operand_shuffle)
24979 rtx (*gen) (rtx, rtx, rtx) = NULL;
24980 switch (mode)
24982 case E_V16SImode:
24983 gen =gen_avx512f_permvarv16si;
24984 break;
24985 case E_V16SFmode:
24986 gen = gen_avx512f_permvarv16sf;
24987 break;
24988 case E_V8DImode:
24989 gen = gen_avx512f_permvarv8di;
24990 break;
24991 case E_V8DFmode:
24992 gen = gen_avx512f_permvarv8df;
24993 break;
24994 default:
24995 break;
24997 if (gen != NULL)
24999 emit_insn (gen (target, op0, mask));
25000 return;
25004 if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL))
25005 return;
25007 if (TARGET_AVX2)
25009 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
25011 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
25012 an constant shuffle operand. With a tiny bit of effort we can
25013 use VPERMD instead. A re-interpretation stall for V4DFmode is
25014 unfortunate but there's no avoiding it.
25015 Similarly for V16HImode we don't have instructions for variable
25016 shuffling, while for V32QImode we can use after preparing suitable
25017 masks vpshufb; vpshufb; vpermq; vpor. */
25019 if (mode == V16HImode)
25021 maskmode = mode = V32QImode;
25022 w = 32;
25023 e = 1;
25025 else
25027 maskmode = mode = V8SImode;
25028 w = 8;
25029 e = 4;
25031 t1 = gen_reg_rtx (maskmode);
25033 /* Replicate the low bits of the V4DImode mask into V8SImode:
25034 mask = { A B C D }
25035 t1 = { A A B B C C D D }. */
25036 for (i = 0; i < w / 2; ++i)
25037 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
25038 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
25039 vt = force_reg (maskmode, vt);
25040 mask = gen_lowpart (maskmode, mask);
25041 if (maskmode == V8SImode)
25042 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
25043 else
25044 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
25046 /* Multiply the shuffle indicies by two. */
25047 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
25048 OPTAB_DIRECT);
25050 /* Add one to the odd shuffle indicies:
25051 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
25052 for (i = 0; i < w / 2; ++i)
25054 vec[i * 2] = const0_rtx;
25055 vec[i * 2 + 1] = const1_rtx;
25057 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
25058 vt = validize_mem (force_const_mem (maskmode, vt));
25059 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
25060 OPTAB_DIRECT);
25062 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
25063 operands[3] = mask = t1;
25064 target = gen_reg_rtx (mode);
25065 op0 = gen_lowpart (mode, op0);
25066 op1 = gen_lowpart (mode, op1);
25069 switch (mode)
25071 case E_V8SImode:
25072 /* The VPERMD and VPERMPS instructions already properly ignore
25073 the high bits of the shuffle elements. No need for us to
25074 perform an AND ourselves. */
25075 if (one_operand_shuffle)
25077 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
25078 if (target != operands[0])
25079 emit_move_insn (operands[0],
25080 gen_lowpart (GET_MODE (operands[0]), target));
25082 else
25084 t1 = gen_reg_rtx (V8SImode);
25085 t2 = gen_reg_rtx (V8SImode);
25086 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
25087 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
25088 goto merge_two;
25090 return;
25092 case E_V8SFmode:
25093 mask = gen_lowpart (V8SImode, mask);
25094 if (one_operand_shuffle)
25095 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
25096 else
25098 t1 = gen_reg_rtx (V8SFmode);
25099 t2 = gen_reg_rtx (V8SFmode);
25100 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
25101 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
25102 goto merge_two;
25104 return;
25106 case E_V4SImode:
25107 /* By combining the two 128-bit input vectors into one 256-bit
25108 input vector, we can use VPERMD and VPERMPS for the full
25109 two-operand shuffle. */
25110 t1 = gen_reg_rtx (V8SImode);
25111 t2 = gen_reg_rtx (V8SImode);
25112 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
25113 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
25114 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
25115 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
25116 return;
25118 case E_V4SFmode:
25119 t1 = gen_reg_rtx (V8SFmode);
25120 t2 = gen_reg_rtx (V8SImode);
25121 mask = gen_lowpart (V4SImode, mask);
25122 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
25123 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
25124 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
25125 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
25126 return;
25128 case E_V32QImode:
25129 t1 = gen_reg_rtx (V32QImode);
25130 t2 = gen_reg_rtx (V32QImode);
25131 t3 = gen_reg_rtx (V32QImode);
25132 vt2 = GEN_INT (-128);
25133 vt = gen_const_vec_duplicate (V32QImode, vt2);
25134 vt = force_reg (V32QImode, vt);
25135 for (i = 0; i < 32; i++)
25136 vec[i] = i < 16 ? vt2 : const0_rtx;
25137 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
25138 vt2 = force_reg (V32QImode, vt2);
25139 /* From mask create two adjusted masks, which contain the same
25140 bits as mask in the low 7 bits of each vector element.
25141 The first mask will have the most significant bit clear
25142 if it requests element from the same 128-bit lane
25143 and MSB set if it requests element from the other 128-bit lane.
25144 The second mask will have the opposite values of the MSB,
25145 and additionally will have its 128-bit lanes swapped.
25146 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
25147 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
25148 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
25149 stands for other 12 bytes. */
25150 /* The bit whether element is from the same lane or the other
25151 lane is bit 4, so shift it up by 3 to the MSB position. */
25152 t5 = gen_reg_rtx (V4DImode);
25153 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
25154 GEN_INT (3)));
25155 /* Clear MSB bits from the mask just in case it had them set. */
25156 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
25157 /* After this t1 will have MSB set for elements from other lane. */
25158 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
25159 /* Clear bits other than MSB. */
25160 emit_insn (gen_andv32qi3 (t1, t1, vt));
25161 /* Or in the lower bits from mask into t3. */
25162 emit_insn (gen_iorv32qi3 (t3, t1, t2));
25163 /* And invert MSB bits in t1, so MSB is set for elements from the same
25164 lane. */
25165 emit_insn (gen_xorv32qi3 (t1, t1, vt));
25166 /* Swap 128-bit lanes in t3. */
25167 t6 = gen_reg_rtx (V4DImode);
25168 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
25169 const2_rtx, GEN_INT (3),
25170 const0_rtx, const1_rtx));
25171 /* And or in the lower bits from mask into t1. */
25172 emit_insn (gen_iorv32qi3 (t1, t1, t2));
25173 if (one_operand_shuffle)
25175 /* Each of these shuffles will put 0s in places where
25176 element from the other 128-bit lane is needed, otherwise
25177 will shuffle in the requested value. */
25178 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
25179 gen_lowpart (V32QImode, t6)));
25180 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
25181 /* For t3 the 128-bit lanes are swapped again. */
25182 t7 = gen_reg_rtx (V4DImode);
25183 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
25184 const2_rtx, GEN_INT (3),
25185 const0_rtx, const1_rtx));
25186 /* And oring both together leads to the result. */
25187 emit_insn (gen_iorv32qi3 (target, t1,
25188 gen_lowpart (V32QImode, t7)));
25189 if (target != operands[0])
25190 emit_move_insn (operands[0],
25191 gen_lowpart (GET_MODE (operands[0]), target));
25192 return;
25195 t4 = gen_reg_rtx (V32QImode);
25196 /* Similarly to the above one_operand_shuffle code,
25197 just for repeated twice for each operand. merge_two:
25198 code will merge the two results together. */
25199 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
25200 gen_lowpart (V32QImode, t6)));
25201 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
25202 gen_lowpart (V32QImode, t6)));
25203 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
25204 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
25205 t7 = gen_reg_rtx (V4DImode);
25206 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
25207 const2_rtx, GEN_INT (3),
25208 const0_rtx, const1_rtx));
25209 t8 = gen_reg_rtx (V4DImode);
25210 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
25211 const2_rtx, GEN_INT (3),
25212 const0_rtx, const1_rtx));
25213 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
25214 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
25215 t1 = t4;
25216 t2 = t3;
25217 goto merge_two;
25219 default:
25220 gcc_assert (GET_MODE_SIZE (mode) <= 16);
25221 break;
25225 if (TARGET_XOP)
25227 /* The XOP VPPERM insn supports three inputs. By ignoring the
25228 one_operand_shuffle special case, we avoid creating another
25229 set of constant vectors in memory. */
25230 one_operand_shuffle = false;
25232 /* mask = mask & {2*w-1, ...} */
25233 vt = GEN_INT (2*w - 1);
25235 else
25237 /* mask = mask & {w-1, ...} */
25238 vt = GEN_INT (w - 1);
25241 vt = gen_const_vec_duplicate (maskmode, vt);
25242 mask = expand_simple_binop (maskmode, AND, mask, vt,
25243 NULL_RTX, 0, OPTAB_DIRECT);
25245 /* For non-QImode operations, convert the word permutation control
25246 into a byte permutation control. */
25247 if (mode != V16QImode)
25249 mask = expand_simple_binop (maskmode, ASHIFT, mask,
25250 GEN_INT (exact_log2 (e)),
25251 NULL_RTX, 0, OPTAB_DIRECT);
25253 /* Convert mask to vector of chars. */
25254 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
25256 /* Replicate each of the input bytes into byte positions:
25257 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
25258 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
25259 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
25260 for (i = 0; i < 16; ++i)
25261 vec[i] = GEN_INT (i/e * e);
25262 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
25263 vt = validize_mem (force_const_mem (V16QImode, vt));
25264 if (TARGET_XOP)
25265 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
25266 else
25267 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
25269 /* Convert it into the byte positions by doing
25270 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
25271 for (i = 0; i < 16; ++i)
25272 vec[i] = GEN_INT (i % e);
25273 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
25274 vt = validize_mem (force_const_mem (V16QImode, vt));
25275 emit_insn (gen_addv16qi3 (mask, mask, vt));
25278 /* The actual shuffle operations all operate on V16QImode. */
25279 op0 = gen_lowpart (V16QImode, op0);
25280 op1 = gen_lowpart (V16QImode, op1);
25282 if (TARGET_XOP)
25284 if (GET_MODE (target) != V16QImode)
25285 target = gen_reg_rtx (V16QImode);
25286 emit_insn (gen_xop_pperm (target, op0, op1, mask));
25287 if (target != operands[0])
25288 emit_move_insn (operands[0],
25289 gen_lowpart (GET_MODE (operands[0]), target));
25291 else if (one_operand_shuffle)
25293 if (GET_MODE (target) != V16QImode)
25294 target = gen_reg_rtx (V16QImode);
25295 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
25296 if (target != operands[0])
25297 emit_move_insn (operands[0],
25298 gen_lowpart (GET_MODE (operands[0]), target));
25300 else
25302 rtx xops[6];
25303 bool ok;
25305 /* Shuffle the two input vectors independently. */
25306 t1 = gen_reg_rtx (V16QImode);
25307 t2 = gen_reg_rtx (V16QImode);
25308 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
25309 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
25311 merge_two:
25312 /* Then merge them together. The key is whether any given control
25313 element contained a bit set that indicates the second word. */
25314 mask = operands[3];
25315 vt = GEN_INT (w);
25316 if (maskmode == V2DImode && !TARGET_SSE4_1)
25318 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
25319 more shuffle to convert the V2DI input mask into a V4SI
25320 input mask. At which point the masking that expand_int_vcond
25321 will work as desired. */
25322 rtx t3 = gen_reg_rtx (V4SImode);
25323 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
25324 const0_rtx, const0_rtx,
25325 const2_rtx, const2_rtx));
25326 mask = t3;
25327 maskmode = V4SImode;
25328 e = w = 4;
25331 vt = gen_const_vec_duplicate (maskmode, vt);
25332 vt = force_reg (maskmode, vt);
25333 mask = expand_simple_binop (maskmode, AND, mask, vt,
25334 NULL_RTX, 0, OPTAB_DIRECT);
25336 if (GET_MODE (target) != mode)
25337 target = gen_reg_rtx (mode);
25338 xops[0] = target;
25339 xops[1] = gen_lowpart (mode, t2);
25340 xops[2] = gen_lowpart (mode, t1);
25341 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
25342 xops[4] = mask;
25343 xops[5] = vt;
25344 ok = ix86_expand_int_vcond (xops);
25345 gcc_assert (ok);
25346 if (target != operands[0])
25347 emit_move_insn (operands[0],
25348 gen_lowpart (GET_MODE (operands[0]), target));
25352 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
25353 true if we should do zero extension, else sign extension. HIGH_P is
25354 true if we want the N/2 high elements, else the low elements. */
25356 void
25357 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
25359 machine_mode imode = GET_MODE (src);
25360 rtx tmp;
25362 if (TARGET_SSE4_1)
25364 rtx (*unpack)(rtx, rtx);
25365 rtx (*extract)(rtx, rtx) = NULL;
25366 machine_mode halfmode = BLKmode;
25368 switch (imode)
25370 case E_V64QImode:
25371 if (unsigned_p)
25372 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
25373 else
25374 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
25375 halfmode = V32QImode;
25376 extract
25377 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
25378 break;
25379 case E_V32QImode:
25380 if (unsigned_p)
25381 unpack = gen_avx2_zero_extendv16qiv16hi2;
25382 else
25383 unpack = gen_avx2_sign_extendv16qiv16hi2;
25384 halfmode = V16QImode;
25385 extract
25386 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
25387 break;
25388 case E_V32HImode:
25389 if (unsigned_p)
25390 unpack = gen_avx512f_zero_extendv16hiv16si2;
25391 else
25392 unpack = gen_avx512f_sign_extendv16hiv16si2;
25393 halfmode = V16HImode;
25394 extract
25395 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
25396 break;
25397 case E_V16HImode:
25398 if (unsigned_p)
25399 unpack = gen_avx2_zero_extendv8hiv8si2;
25400 else
25401 unpack = gen_avx2_sign_extendv8hiv8si2;
25402 halfmode = V8HImode;
25403 extract
25404 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
25405 break;
25406 case E_V16SImode:
25407 if (unsigned_p)
25408 unpack = gen_avx512f_zero_extendv8siv8di2;
25409 else
25410 unpack = gen_avx512f_sign_extendv8siv8di2;
25411 halfmode = V8SImode;
25412 extract
25413 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
25414 break;
25415 case E_V8SImode:
25416 if (unsigned_p)
25417 unpack = gen_avx2_zero_extendv4siv4di2;
25418 else
25419 unpack = gen_avx2_sign_extendv4siv4di2;
25420 halfmode = V4SImode;
25421 extract
25422 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
25423 break;
25424 case E_V16QImode:
25425 if (unsigned_p)
25426 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
25427 else
25428 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
25429 break;
25430 case E_V8HImode:
25431 if (unsigned_p)
25432 unpack = gen_sse4_1_zero_extendv4hiv4si2;
25433 else
25434 unpack = gen_sse4_1_sign_extendv4hiv4si2;
25435 break;
25436 case E_V4SImode:
25437 if (unsigned_p)
25438 unpack = gen_sse4_1_zero_extendv2siv2di2;
25439 else
25440 unpack = gen_sse4_1_sign_extendv2siv2di2;
25441 break;
25442 default:
25443 gcc_unreachable ();
25446 if (GET_MODE_SIZE (imode) >= 32)
25448 tmp = gen_reg_rtx (halfmode);
25449 emit_insn (extract (tmp, src));
25451 else if (high_p)
25453 /* Shift higher 8 bytes to lower 8 bytes. */
25454 tmp = gen_reg_rtx (V1TImode);
25455 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
25456 GEN_INT (64)));
25457 tmp = gen_lowpart (imode, tmp);
25459 else
25460 tmp = src;
25462 emit_insn (unpack (dest, tmp));
25464 else
25466 rtx (*unpack)(rtx, rtx, rtx);
25468 switch (imode)
25470 case E_V16QImode:
25471 if (high_p)
25472 unpack = gen_vec_interleave_highv16qi;
25473 else
25474 unpack = gen_vec_interleave_lowv16qi;
25475 break;
25476 case E_V8HImode:
25477 if (high_p)
25478 unpack = gen_vec_interleave_highv8hi;
25479 else
25480 unpack = gen_vec_interleave_lowv8hi;
25481 break;
25482 case E_V4SImode:
25483 if (high_p)
25484 unpack = gen_vec_interleave_highv4si;
25485 else
25486 unpack = gen_vec_interleave_lowv4si;
25487 break;
25488 default:
25489 gcc_unreachable ();
25492 if (unsigned_p)
25493 tmp = force_reg (imode, CONST0_RTX (imode));
25494 else
25495 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
25496 src, pc_rtx, pc_rtx);
25498 rtx tmp2 = gen_reg_rtx (imode);
25499 emit_insn (unpack (tmp2, src, tmp));
25500 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
25504 /* Expand conditional increment or decrement using adb/sbb instructions.
25505 The default case using setcc followed by the conditional move can be
25506 done by generic code. */
25507 bool
25508 ix86_expand_int_addcc (rtx operands[])
25510 enum rtx_code code = GET_CODE (operands[1]);
25511 rtx flags;
25512 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
25513 rtx compare_op;
25514 rtx val = const0_rtx;
25515 bool fpcmp = false;
25516 machine_mode mode;
25517 rtx op0 = XEXP (operands[1], 0);
25518 rtx op1 = XEXP (operands[1], 1);
25520 if (operands[3] != const1_rtx
25521 && operands[3] != constm1_rtx)
25522 return false;
25523 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
25524 return false;
25525 code = GET_CODE (compare_op);
25527 flags = XEXP (compare_op, 0);
25529 if (GET_MODE (flags) == CCFPmode)
25531 fpcmp = true;
25532 code = ix86_fp_compare_code_to_integer (code);
25535 if (code != LTU)
25537 val = constm1_rtx;
25538 if (fpcmp)
25539 PUT_CODE (compare_op,
25540 reverse_condition_maybe_unordered
25541 (GET_CODE (compare_op)));
25542 else
25543 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
25546 mode = GET_MODE (operands[0]);
25548 /* Construct either adc or sbb insn. */
25549 if ((code == LTU) == (operands[3] == constm1_rtx))
25551 switch (mode)
25553 case E_QImode:
25554 insn = gen_subqi3_carry;
25555 break;
25556 case E_HImode:
25557 insn = gen_subhi3_carry;
25558 break;
25559 case E_SImode:
25560 insn = gen_subsi3_carry;
25561 break;
25562 case E_DImode:
25563 insn = gen_subdi3_carry;
25564 break;
25565 default:
25566 gcc_unreachable ();
25569 else
25571 switch (mode)
25573 case E_QImode:
25574 insn = gen_addqi3_carry;
25575 break;
25576 case E_HImode:
25577 insn = gen_addhi3_carry;
25578 break;
25579 case E_SImode:
25580 insn = gen_addsi3_carry;
25581 break;
25582 case E_DImode:
25583 insn = gen_adddi3_carry;
25584 break;
25585 default:
25586 gcc_unreachable ();
25589 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
25591 return true;
25595 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
25596 but works for floating pointer parameters and nonoffsetable memories.
25597 For pushes, it returns just stack offsets; the values will be saved
25598 in the right order. Maximally three parts are generated. */
25600 static int
25601 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
25603 int size;
25605 if (!TARGET_64BIT)
25606 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
25607 else
25608 size = (GET_MODE_SIZE (mode) + 4) / 8;
25610 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
25611 gcc_assert (size >= 2 && size <= 4);
25613 /* Optimize constant pool reference to immediates. This is used by fp
25614 moves, that force all constants to memory to allow combining. */
25615 if (MEM_P (operand) && MEM_READONLY_P (operand))
25616 operand = avoid_constant_pool_reference (operand);
25618 if (MEM_P (operand) && !offsettable_memref_p (operand))
25620 /* The only non-offsetable memories we handle are pushes. */
25621 int ok = push_operand (operand, VOIDmode);
25623 gcc_assert (ok);
25625 operand = copy_rtx (operand);
25626 PUT_MODE (operand, word_mode);
25627 parts[0] = parts[1] = parts[2] = parts[3] = operand;
25628 return size;
25631 if (GET_CODE (operand) == CONST_VECTOR)
25633 scalar_int_mode imode = int_mode_for_mode (mode).require ();
25634 /* Caution: if we looked through a constant pool memory above,
25635 the operand may actually have a different mode now. That's
25636 ok, since we want to pun this all the way back to an integer. */
25637 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
25638 gcc_assert (operand != NULL);
25639 mode = imode;
25642 if (!TARGET_64BIT)
25644 if (mode == DImode)
25645 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
25646 else
25648 int i;
25650 if (REG_P (operand))
25652 gcc_assert (reload_completed);
25653 for (i = 0; i < size; i++)
25654 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
25656 else if (offsettable_memref_p (operand))
25658 operand = adjust_address (operand, SImode, 0);
25659 parts[0] = operand;
25660 for (i = 1; i < size; i++)
25661 parts[i] = adjust_address (operand, SImode, 4 * i);
25663 else if (CONST_DOUBLE_P (operand))
25665 const REAL_VALUE_TYPE *r;
25666 long l[4];
25668 r = CONST_DOUBLE_REAL_VALUE (operand);
25669 switch (mode)
25671 case E_TFmode:
25672 real_to_target (l, r, mode);
25673 parts[3] = gen_int_mode (l[3], SImode);
25674 parts[2] = gen_int_mode (l[2], SImode);
25675 break;
25676 case E_XFmode:
25677 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
25678 long double may not be 80-bit. */
25679 real_to_target (l, r, mode);
25680 parts[2] = gen_int_mode (l[2], SImode);
25681 break;
25682 case E_DFmode:
25683 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
25684 break;
25685 default:
25686 gcc_unreachable ();
25688 parts[1] = gen_int_mode (l[1], SImode);
25689 parts[0] = gen_int_mode (l[0], SImode);
25691 else
25692 gcc_unreachable ();
25695 else
25697 if (mode == TImode)
25698 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
25699 if (mode == XFmode || mode == TFmode)
25701 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
25702 if (REG_P (operand))
25704 gcc_assert (reload_completed);
25705 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
25706 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
25708 else if (offsettable_memref_p (operand))
25710 operand = adjust_address (operand, DImode, 0);
25711 parts[0] = operand;
25712 parts[1] = adjust_address (operand, upper_mode, 8);
25714 else if (CONST_DOUBLE_P (operand))
25716 long l[4];
25718 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
25720 /* real_to_target puts 32-bit pieces in each long. */
25721 parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff))
25722 | ((l[1] & HOST_WIDE_INT_C (0xffffffff))
25723 << 32), DImode);
25725 if (upper_mode == SImode)
25726 parts[1] = gen_int_mode (l[2], SImode);
25727 else
25728 parts[1]
25729 = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff))
25730 | ((l[3] & HOST_WIDE_INT_C (0xffffffff))
25731 << 32), DImode);
25733 else
25734 gcc_unreachable ();
25738 return size;
25741 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
25742 Return false when normal moves are needed; true when all required
25743 insns have been emitted. Operands 2-4 contain the input values
25744 int the correct order; operands 5-7 contain the output values. */
25746 void
25747 ix86_split_long_move (rtx operands[])
25749 rtx part[2][4];
25750 int nparts, i, j;
25751 int push = 0;
25752 int collisions = 0;
25753 machine_mode mode = GET_MODE (operands[0]);
25754 bool collisionparts[4];
25756 /* The DFmode expanders may ask us to move double.
25757 For 64bit target this is single move. By hiding the fact
25758 here we simplify i386.md splitters. */
25759 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
25761 /* Optimize constant pool reference to immediates. This is used by
25762 fp moves, that force all constants to memory to allow combining. */
25764 if (MEM_P (operands[1])
25765 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
25766 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
25767 operands[1] = get_pool_constant (XEXP (operands[1], 0));
25768 if (push_operand (operands[0], VOIDmode))
25770 operands[0] = copy_rtx (operands[0]);
25771 PUT_MODE (operands[0], word_mode);
25773 else
25774 operands[0] = gen_lowpart (DImode, operands[0]);
25775 operands[1] = gen_lowpart (DImode, operands[1]);
25776 emit_move_insn (operands[0], operands[1]);
25777 return;
25780 /* The only non-offsettable memory we handle is push. */
25781 if (push_operand (operands[0], VOIDmode))
25782 push = 1;
25783 else
25784 gcc_assert (!MEM_P (operands[0])
25785 || offsettable_memref_p (operands[0]));
25787 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
25788 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
25790 /* When emitting push, take care for source operands on the stack. */
25791 if (push && MEM_P (operands[1])
25792 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
25794 rtx src_base = XEXP (part[1][nparts - 1], 0);
25796 /* Compensate for the stack decrement by 4. */
25797 if (!TARGET_64BIT && nparts == 3
25798 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
25799 src_base = plus_constant (Pmode, src_base, 4);
25801 /* src_base refers to the stack pointer and is
25802 automatically decreased by emitted push. */
25803 for (i = 0; i < nparts; i++)
25804 part[1][i] = change_address (part[1][i],
25805 GET_MODE (part[1][i]), src_base);
25808 /* We need to do copy in the right order in case an address register
25809 of the source overlaps the destination. */
25810 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
25812 rtx tmp;
25814 for (i = 0; i < nparts; i++)
25816 collisionparts[i]
25817 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
25818 if (collisionparts[i])
25819 collisions++;
25822 /* Collision in the middle part can be handled by reordering. */
25823 if (collisions == 1 && nparts == 3 && collisionparts [1])
25825 std::swap (part[0][1], part[0][2]);
25826 std::swap (part[1][1], part[1][2]);
25828 else if (collisions == 1
25829 && nparts == 4
25830 && (collisionparts [1] || collisionparts [2]))
25832 if (collisionparts [1])
25834 std::swap (part[0][1], part[0][2]);
25835 std::swap (part[1][1], part[1][2]);
25837 else
25839 std::swap (part[0][2], part[0][3]);
25840 std::swap (part[1][2], part[1][3]);
25844 /* If there are more collisions, we can't handle it by reordering.
25845 Do an lea to the last part and use only one colliding move. */
25846 else if (collisions > 1)
25848 rtx base, addr;
25850 collisions = 1;
25852 base = part[0][nparts - 1];
25854 /* Handle the case when the last part isn't valid for lea.
25855 Happens in 64-bit mode storing the 12-byte XFmode. */
25856 if (GET_MODE (base) != Pmode)
25857 base = gen_rtx_REG (Pmode, REGNO (base));
25859 addr = XEXP (part[1][0], 0);
25860 if (TARGET_TLS_DIRECT_SEG_REFS)
25862 struct ix86_address parts;
25863 int ok = ix86_decompose_address (addr, &parts);
25864 gcc_assert (ok);
25865 /* It is not valid to use %gs: or %fs: in lea. */
25866 gcc_assert (parts.seg == ADDR_SPACE_GENERIC);
25868 emit_insn (gen_rtx_SET (base, addr));
25869 part[1][0] = replace_equiv_address (part[1][0], base);
25870 for (i = 1; i < nparts; i++)
25872 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
25873 part[1][i] = replace_equiv_address (part[1][i], tmp);
25878 if (push)
25880 if (!TARGET_64BIT)
25882 if (nparts == 3)
25884 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
25885 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
25886 stack_pointer_rtx, GEN_INT (-4)));
25887 emit_move_insn (part[0][2], part[1][2]);
25889 else if (nparts == 4)
25891 emit_move_insn (part[0][3], part[1][3]);
25892 emit_move_insn (part[0][2], part[1][2]);
25895 else
25897 /* In 64bit mode we don't have 32bit push available. In case this is
25898 register, it is OK - we will just use larger counterpart. We also
25899 retype memory - these comes from attempt to avoid REX prefix on
25900 moving of second half of TFmode value. */
25901 if (GET_MODE (part[1][1]) == SImode)
25903 switch (GET_CODE (part[1][1]))
25905 case MEM:
25906 part[1][1] = adjust_address (part[1][1], DImode, 0);
25907 break;
25909 case REG:
25910 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
25911 break;
25913 default:
25914 gcc_unreachable ();
25917 if (GET_MODE (part[1][0]) == SImode)
25918 part[1][0] = part[1][1];
25921 emit_move_insn (part[0][1], part[1][1]);
25922 emit_move_insn (part[0][0], part[1][0]);
25923 return;
25926 /* Choose correct order to not overwrite the source before it is copied. */
25927 if ((REG_P (part[0][0])
25928 && REG_P (part[1][1])
25929 && (REGNO (part[0][0]) == REGNO (part[1][1])
25930 || (nparts == 3
25931 && REGNO (part[0][0]) == REGNO (part[1][2]))
25932 || (nparts == 4
25933 && REGNO (part[0][0]) == REGNO (part[1][3]))))
25934 || (collisions > 0
25935 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
25937 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
25939 operands[2 + i] = part[0][j];
25940 operands[6 + i] = part[1][j];
25943 else
25945 for (i = 0; i < nparts; i++)
25947 operands[2 + i] = part[0][i];
25948 operands[6 + i] = part[1][i];
25952 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
25953 if (optimize_insn_for_size_p ())
25955 for (j = 0; j < nparts - 1; j++)
25956 if (CONST_INT_P (operands[6 + j])
25957 && operands[6 + j] != const0_rtx
25958 && REG_P (operands[2 + j]))
25959 for (i = j; i < nparts - 1; i++)
25960 if (CONST_INT_P (operands[7 + i])
25961 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
25962 operands[7 + i] = operands[2 + j];
25965 for (i = 0; i < nparts; i++)
25966 emit_move_insn (operands[2 + i], operands[6 + i]);
25968 return;
25971 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
25972 left shift by a constant, either using a single shift or
25973 a sequence of add instructions. */
25975 static void
25976 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
25978 rtx (*insn)(rtx, rtx, rtx);
25980 if (count == 1
25981 || (count * ix86_cost->add <= ix86_cost->shift_const
25982 && !optimize_insn_for_size_p ()))
25984 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
25985 while (count-- > 0)
25986 emit_insn (insn (operand, operand, operand));
25988 else
25990 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25991 emit_insn (insn (operand, operand, GEN_INT (count)));
25995 void
25996 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
25998 rtx (*gen_ashl3)(rtx, rtx, rtx);
25999 rtx (*gen_shld)(rtx, rtx, rtx);
26000 int half_width = GET_MODE_BITSIZE (mode) >> 1;
26002 rtx low[2], high[2];
26003 int count;
26005 if (CONST_INT_P (operands[2]))
26007 split_double_mode (mode, operands, 2, low, high);
26008 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
26010 if (count >= half_width)
26012 emit_move_insn (high[0], low[1]);
26013 emit_move_insn (low[0], const0_rtx);
26015 if (count > half_width)
26016 ix86_expand_ashl_const (high[0], count - half_width, mode);
26018 else
26020 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
26022 if (!rtx_equal_p (operands[0], operands[1]))
26023 emit_move_insn (operands[0], operands[1]);
26025 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
26026 ix86_expand_ashl_const (low[0], count, mode);
26028 return;
26031 split_double_mode (mode, operands, 1, low, high);
26033 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
26035 if (operands[1] == const1_rtx)
26037 /* Assuming we've chosen a QImode capable registers, then 1 << N
26038 can be done with two 32/64-bit shifts, no branches, no cmoves. */
26039 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
26041 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
26043 ix86_expand_clear (low[0]);
26044 ix86_expand_clear (high[0]);
26045 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
26047 d = gen_lowpart (QImode, low[0]);
26048 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
26049 s = gen_rtx_EQ (QImode, flags, const0_rtx);
26050 emit_insn (gen_rtx_SET (d, s));
26052 d = gen_lowpart (QImode, high[0]);
26053 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
26054 s = gen_rtx_NE (QImode, flags, const0_rtx);
26055 emit_insn (gen_rtx_SET (d, s));
26058 /* Otherwise, we can get the same results by manually performing
26059 a bit extract operation on bit 5/6, and then performing the two
26060 shifts. The two methods of getting 0/1 into low/high are exactly
26061 the same size. Avoiding the shift in the bit extract case helps
26062 pentium4 a bit; no one else seems to care much either way. */
26063 else
26065 machine_mode half_mode;
26066 rtx (*gen_lshr3)(rtx, rtx, rtx);
26067 rtx (*gen_and3)(rtx, rtx, rtx);
26068 rtx (*gen_xor3)(rtx, rtx, rtx);
26069 HOST_WIDE_INT bits;
26070 rtx x;
26072 if (mode == DImode)
26074 half_mode = SImode;
26075 gen_lshr3 = gen_lshrsi3;
26076 gen_and3 = gen_andsi3;
26077 gen_xor3 = gen_xorsi3;
26078 bits = 5;
26080 else
26082 half_mode = DImode;
26083 gen_lshr3 = gen_lshrdi3;
26084 gen_and3 = gen_anddi3;
26085 gen_xor3 = gen_xordi3;
26086 bits = 6;
26089 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
26090 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
26091 else
26092 x = gen_lowpart (half_mode, operands[2]);
26093 emit_insn (gen_rtx_SET (high[0], x));
26095 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
26096 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
26097 emit_move_insn (low[0], high[0]);
26098 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
26101 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
26102 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
26103 return;
26106 if (operands[1] == constm1_rtx)
26108 /* For -1 << N, we can avoid the shld instruction, because we
26109 know that we're shifting 0...31/63 ones into a -1. */
26110 emit_move_insn (low[0], constm1_rtx);
26111 if (optimize_insn_for_size_p ())
26112 emit_move_insn (high[0], low[0]);
26113 else
26114 emit_move_insn (high[0], constm1_rtx);
26116 else
26118 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
26120 if (!rtx_equal_p (operands[0], operands[1]))
26121 emit_move_insn (operands[0], operands[1]);
26123 split_double_mode (mode, operands, 1, low, high);
26124 emit_insn (gen_shld (high[0], low[0], operands[2]));
26127 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
26129 if (TARGET_CMOVE && scratch)
26131 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
26132 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
26134 ix86_expand_clear (scratch);
26135 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
26137 else
26139 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
26140 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
26142 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
26146 void
26147 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
26149 rtx (*gen_ashr3)(rtx, rtx, rtx)
26150 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
26151 rtx (*gen_shrd)(rtx, rtx, rtx);
26152 int half_width = GET_MODE_BITSIZE (mode) >> 1;
26154 rtx low[2], high[2];
26155 int count;
26157 if (CONST_INT_P (operands[2]))
26159 split_double_mode (mode, operands, 2, low, high);
26160 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
26162 if (count == GET_MODE_BITSIZE (mode) - 1)
26164 emit_move_insn (high[0], high[1]);
26165 emit_insn (gen_ashr3 (high[0], high[0],
26166 GEN_INT (half_width - 1)));
26167 emit_move_insn (low[0], high[0]);
26170 else if (count >= half_width)
26172 emit_move_insn (low[0], high[1]);
26173 emit_move_insn (high[0], low[0]);
26174 emit_insn (gen_ashr3 (high[0], high[0],
26175 GEN_INT (half_width - 1)));
26177 if (count > half_width)
26178 emit_insn (gen_ashr3 (low[0], low[0],
26179 GEN_INT (count - half_width)));
26181 else
26183 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
26185 if (!rtx_equal_p (operands[0], operands[1]))
26186 emit_move_insn (operands[0], operands[1]);
26188 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
26189 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
26192 else
26194 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
26196 if (!rtx_equal_p (operands[0], operands[1]))
26197 emit_move_insn (operands[0], operands[1]);
26199 split_double_mode (mode, operands, 1, low, high);
26201 emit_insn (gen_shrd (low[0], high[0], operands[2]));
26202 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
26204 if (TARGET_CMOVE && scratch)
26206 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
26207 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
26209 emit_move_insn (scratch, high[0]);
26210 emit_insn (gen_ashr3 (scratch, scratch,
26211 GEN_INT (half_width - 1)));
26212 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
26213 scratch));
26215 else
26217 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
26218 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
26220 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
26225 void
26226 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
26228 rtx (*gen_lshr3)(rtx, rtx, rtx)
26229 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
26230 rtx (*gen_shrd)(rtx, rtx, rtx);
26231 int half_width = GET_MODE_BITSIZE (mode) >> 1;
26233 rtx low[2], high[2];
26234 int count;
26236 if (CONST_INT_P (operands[2]))
26238 split_double_mode (mode, operands, 2, low, high);
26239 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
26241 if (count >= half_width)
26243 emit_move_insn (low[0], high[1]);
26244 ix86_expand_clear (high[0]);
26246 if (count > half_width)
26247 emit_insn (gen_lshr3 (low[0], low[0],
26248 GEN_INT (count - half_width)));
26250 else
26252 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
26254 if (!rtx_equal_p (operands[0], operands[1]))
26255 emit_move_insn (operands[0], operands[1]);
26257 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
26258 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
26261 else
26263 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
26265 if (!rtx_equal_p (operands[0], operands[1]))
26266 emit_move_insn (operands[0], operands[1]);
26268 split_double_mode (mode, operands, 1, low, high);
26270 emit_insn (gen_shrd (low[0], high[0], operands[2]));
26271 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
26273 if (TARGET_CMOVE && scratch)
26275 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
26276 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
26278 ix86_expand_clear (scratch);
26279 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
26280 scratch));
26282 else
26284 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
26285 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
26287 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
26292 /* Predict just emitted jump instruction to be taken with probability PROB. */
26293 static void
26294 predict_jump (int prob)
26296 rtx_insn *insn = get_last_insn ();
26297 gcc_assert (JUMP_P (insn));
26298 add_reg_br_prob_note (insn, profile_probability::from_reg_br_prob_base (prob));
26301 /* Helper function for the string operations below. Dest VARIABLE whether
26302 it is aligned to VALUE bytes. If true, jump to the label. */
26303 static rtx_code_label *
26304 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
26306 rtx_code_label *label = gen_label_rtx ();
26307 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
26308 if (GET_MODE (variable) == DImode)
26309 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
26310 else
26311 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
26312 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
26313 1, label);
26314 if (epilogue)
26315 predict_jump (REG_BR_PROB_BASE * 50 / 100);
26316 else
26317 predict_jump (REG_BR_PROB_BASE * 90 / 100);
26318 return label;
26321 /* Adjust COUNTER by the VALUE. */
26322 static void
26323 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
26325 rtx (*gen_add)(rtx, rtx, rtx)
26326 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
26328 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
26331 /* Zero extend possibly SImode EXP to Pmode register. */
26333 ix86_zero_extend_to_Pmode (rtx exp)
26335 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
26338 /* Divide COUNTREG by SCALE. */
26339 static rtx
26340 scale_counter (rtx countreg, int scale)
26342 rtx sc;
26344 if (scale == 1)
26345 return countreg;
26346 if (CONST_INT_P (countreg))
26347 return GEN_INT (INTVAL (countreg) / scale);
26348 gcc_assert (REG_P (countreg));
26350 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
26351 GEN_INT (exact_log2 (scale)),
26352 NULL, 1, OPTAB_DIRECT);
26353 return sc;
26356 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
26357 DImode for constant loop counts. */
26359 static machine_mode
26360 counter_mode (rtx count_exp)
26362 if (GET_MODE (count_exp) != VOIDmode)
26363 return GET_MODE (count_exp);
26364 if (!CONST_INT_P (count_exp))
26365 return Pmode;
26366 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
26367 return DImode;
26368 return SImode;
26371 /* Copy the address to a Pmode register. This is used for x32 to
26372 truncate DImode TLS address to a SImode register. */
26374 static rtx
26375 ix86_copy_addr_to_reg (rtx addr)
26377 rtx reg;
26378 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
26380 reg = copy_addr_to_reg (addr);
26381 REG_POINTER (reg) = 1;
26382 return reg;
26384 else
26386 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
26387 reg = copy_to_mode_reg (DImode, addr);
26388 REG_POINTER (reg) = 1;
26389 return gen_rtx_SUBREG (SImode, reg, 0);
26393 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
26394 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
26395 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
26396 memory by VALUE (supposed to be in MODE).
26398 The size is rounded down to whole number of chunk size moved at once.
26399 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
26402 static void
26403 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
26404 rtx destptr, rtx srcptr, rtx value,
26405 rtx count, machine_mode mode, int unroll,
26406 int expected_size, bool issetmem)
26408 rtx_code_label *out_label, *top_label;
26409 rtx iter, tmp;
26410 machine_mode iter_mode = counter_mode (count);
26411 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
26412 rtx piece_size = GEN_INT (piece_size_n);
26413 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
26414 rtx size;
26415 int i;
26417 top_label = gen_label_rtx ();
26418 out_label = gen_label_rtx ();
26419 iter = gen_reg_rtx (iter_mode);
26421 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
26422 NULL, 1, OPTAB_DIRECT);
26423 /* Those two should combine. */
26424 if (piece_size == const1_rtx)
26426 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
26427 true, out_label);
26428 predict_jump (REG_BR_PROB_BASE * 10 / 100);
26430 emit_move_insn (iter, const0_rtx);
26432 emit_label (top_label);
26434 tmp = convert_modes (Pmode, iter_mode, iter, true);
26436 /* This assert could be relaxed - in this case we'll need to compute
26437 smallest power of two, containing in PIECE_SIZE_N and pass it to
26438 offset_address. */
26439 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
26440 destmem = offset_address (destmem, tmp, piece_size_n);
26441 destmem = adjust_address (destmem, mode, 0);
26443 if (!issetmem)
26445 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
26446 srcmem = adjust_address (srcmem, mode, 0);
26448 /* When unrolling for chips that reorder memory reads and writes,
26449 we can save registers by using single temporary.
26450 Also using 4 temporaries is overkill in 32bit mode. */
26451 if (!TARGET_64BIT && 0)
26453 for (i = 0; i < unroll; i++)
26455 if (i)
26457 destmem =
26458 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
26459 srcmem =
26460 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
26462 emit_move_insn (destmem, srcmem);
26465 else
26467 rtx tmpreg[4];
26468 gcc_assert (unroll <= 4);
26469 for (i = 0; i < unroll; i++)
26471 tmpreg[i] = gen_reg_rtx (mode);
26472 if (i)
26474 srcmem =
26475 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
26477 emit_move_insn (tmpreg[i], srcmem);
26479 for (i = 0; i < unroll; i++)
26481 if (i)
26483 destmem =
26484 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
26486 emit_move_insn (destmem, tmpreg[i]);
26490 else
26491 for (i = 0; i < unroll; i++)
26493 if (i)
26494 destmem =
26495 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
26496 emit_move_insn (destmem, value);
26499 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
26500 true, OPTAB_LIB_WIDEN);
26501 if (tmp != iter)
26502 emit_move_insn (iter, tmp);
26504 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
26505 true, top_label);
26506 if (expected_size != -1)
26508 expected_size /= GET_MODE_SIZE (mode) * unroll;
26509 if (expected_size == 0)
26510 predict_jump (0);
26511 else if (expected_size > REG_BR_PROB_BASE)
26512 predict_jump (REG_BR_PROB_BASE - 1);
26513 else
26514 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
26516 else
26517 predict_jump (REG_BR_PROB_BASE * 80 / 100);
26518 iter = ix86_zero_extend_to_Pmode (iter);
26519 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
26520 true, OPTAB_LIB_WIDEN);
26521 if (tmp != destptr)
26522 emit_move_insn (destptr, tmp);
26523 if (!issetmem)
26525 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
26526 true, OPTAB_LIB_WIDEN);
26527 if (tmp != srcptr)
26528 emit_move_insn (srcptr, tmp);
26530 emit_label (out_label);
26533 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
26534 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
26535 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
26536 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
26537 ORIG_VALUE is the original value passed to memset to fill the memory with.
26538 Other arguments have same meaning as for previous function. */
26540 static void
26541 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
26542 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
26543 rtx count,
26544 machine_mode mode, bool issetmem)
26546 rtx destexp;
26547 rtx srcexp;
26548 rtx countreg;
26549 HOST_WIDE_INT rounded_count;
26551 /* If possible, it is shorter to use rep movs.
26552 TODO: Maybe it is better to move this logic to decide_alg. */
26553 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
26554 && (!issetmem || orig_value == const0_rtx))
26555 mode = SImode;
26557 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
26558 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
26560 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
26561 GET_MODE_SIZE (mode)));
26562 if (mode != QImode)
26564 destexp = gen_rtx_ASHIFT (Pmode, countreg,
26565 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
26566 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
26568 else
26569 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
26570 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
26572 rounded_count
26573 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
26574 destmem = shallow_copy_rtx (destmem);
26575 set_mem_size (destmem, rounded_count);
26577 else if (MEM_SIZE_KNOWN_P (destmem))
26578 clear_mem_size (destmem);
26580 if (issetmem)
26582 value = force_reg (mode, gen_lowpart (mode, value));
26583 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
26585 else
26587 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
26588 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
26589 if (mode != QImode)
26591 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
26592 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
26593 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
26595 else
26596 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
26597 if (CONST_INT_P (count))
26599 rounded_count
26600 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
26601 srcmem = shallow_copy_rtx (srcmem);
26602 set_mem_size (srcmem, rounded_count);
26604 else
26606 if (MEM_SIZE_KNOWN_P (srcmem))
26607 clear_mem_size (srcmem);
26609 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
26610 destexp, srcexp));
26614 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
26615 DESTMEM.
26616 SRC is passed by pointer to be updated on return.
26617 Return value is updated DST. */
26618 static rtx
26619 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
26620 HOST_WIDE_INT size_to_move)
26622 rtx dst = destmem, src = *srcmem, adjust, tempreg;
26623 enum insn_code code;
26624 machine_mode move_mode;
26625 int piece_size, i;
26627 /* Find the widest mode in which we could perform moves.
26628 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
26629 it until move of such size is supported. */
26630 piece_size = 1 << floor_log2 (size_to_move);
26631 while (!int_mode_for_size (piece_size * BITS_PER_UNIT, 0).exists (&move_mode)
26632 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
26634 gcc_assert (piece_size > 1);
26635 piece_size >>= 1;
26638 /* Find the corresponding vector mode with the same size as MOVE_MODE.
26639 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
26640 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
26642 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
26643 if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
26644 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
26646 move_mode = word_mode;
26647 piece_size = GET_MODE_SIZE (move_mode);
26648 code = optab_handler (mov_optab, move_mode);
26651 gcc_assert (code != CODE_FOR_nothing);
26653 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
26654 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
26656 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
26657 gcc_assert (size_to_move % piece_size == 0);
26658 adjust = GEN_INT (piece_size);
26659 for (i = 0; i < size_to_move; i += piece_size)
26661 /* We move from memory to memory, so we'll need to do it via
26662 a temporary register. */
26663 tempreg = gen_reg_rtx (move_mode);
26664 emit_insn (GEN_FCN (code) (tempreg, src));
26665 emit_insn (GEN_FCN (code) (dst, tempreg));
26667 emit_move_insn (destptr,
26668 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
26669 emit_move_insn (srcptr,
26670 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
26672 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26673 piece_size);
26674 src = adjust_automodify_address_nv (src, move_mode, srcptr,
26675 piece_size);
26678 /* Update DST and SRC rtx. */
26679 *srcmem = src;
26680 return dst;
26683 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
26684 static void
26685 expand_movmem_epilogue (rtx destmem, rtx srcmem,
26686 rtx destptr, rtx srcptr, rtx count, int max_size)
26688 rtx src, dest;
26689 if (CONST_INT_P (count))
26691 HOST_WIDE_INT countval = INTVAL (count);
26692 HOST_WIDE_INT epilogue_size = countval % max_size;
26693 int i;
26695 /* For now MAX_SIZE should be a power of 2. This assert could be
26696 relaxed, but it'll require a bit more complicated epilogue
26697 expanding. */
26698 gcc_assert ((max_size & (max_size - 1)) == 0);
26699 for (i = max_size; i >= 1; i >>= 1)
26701 if (epilogue_size & i)
26702 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
26704 return;
26706 if (max_size > 8)
26708 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
26709 count, 1, OPTAB_DIRECT);
26710 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
26711 count, QImode, 1, 4, false);
26712 return;
26715 /* When there are stringops, we can cheaply increase dest and src pointers.
26716 Otherwise we save code size by maintaining offset (zero is readily
26717 available from preceding rep operation) and using x86 addressing modes.
26719 if (TARGET_SINGLE_STRINGOP)
26721 if (max_size > 4)
26723 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26724 src = change_address (srcmem, SImode, srcptr);
26725 dest = change_address (destmem, SImode, destptr);
26726 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26727 emit_label (label);
26728 LABEL_NUSES (label) = 1;
26730 if (max_size > 2)
26732 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26733 src = change_address (srcmem, HImode, srcptr);
26734 dest = change_address (destmem, HImode, destptr);
26735 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26736 emit_label (label);
26737 LABEL_NUSES (label) = 1;
26739 if (max_size > 1)
26741 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26742 src = change_address (srcmem, QImode, srcptr);
26743 dest = change_address (destmem, QImode, destptr);
26744 emit_insn (gen_strmov (destptr, dest, srcptr, src));
26745 emit_label (label);
26746 LABEL_NUSES (label) = 1;
26749 else
26751 rtx offset = force_reg (Pmode, const0_rtx);
26752 rtx tmp;
26754 if (max_size > 4)
26756 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26757 src = change_address (srcmem, SImode, srcptr);
26758 dest = change_address (destmem, SImode, destptr);
26759 emit_move_insn (dest, src);
26760 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
26761 true, OPTAB_LIB_WIDEN);
26762 if (tmp != offset)
26763 emit_move_insn (offset, tmp);
26764 emit_label (label);
26765 LABEL_NUSES (label) = 1;
26767 if (max_size > 2)
26769 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26770 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
26771 src = change_address (srcmem, HImode, tmp);
26772 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
26773 dest = change_address (destmem, HImode, tmp);
26774 emit_move_insn (dest, src);
26775 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
26776 true, OPTAB_LIB_WIDEN);
26777 if (tmp != offset)
26778 emit_move_insn (offset, tmp);
26779 emit_label (label);
26780 LABEL_NUSES (label) = 1;
26782 if (max_size > 1)
26784 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26785 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
26786 src = change_address (srcmem, QImode, tmp);
26787 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
26788 dest = change_address (destmem, QImode, tmp);
26789 emit_move_insn (dest, src);
26790 emit_label (label);
26791 LABEL_NUSES (label) = 1;
26796 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
26797 with value PROMOTED_VAL.
26798 SRC is passed by pointer to be updated on return.
26799 Return value is updated DST. */
26800 static rtx
26801 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
26802 HOST_WIDE_INT size_to_move)
26804 rtx dst = destmem, adjust;
26805 enum insn_code code;
26806 machine_mode move_mode;
26807 int piece_size, i;
26809 /* Find the widest mode in which we could perform moves.
26810 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
26811 it until move of such size is supported. */
26812 move_mode = GET_MODE (promoted_val);
26813 if (move_mode == VOIDmode)
26814 move_mode = QImode;
26815 if (size_to_move < GET_MODE_SIZE (move_mode))
26817 unsigned int move_bits = size_to_move * BITS_PER_UNIT;
26818 move_mode = int_mode_for_size (move_bits, 0).require ();
26819 promoted_val = gen_lowpart (move_mode, promoted_val);
26821 piece_size = GET_MODE_SIZE (move_mode);
26822 code = optab_handler (mov_optab, move_mode);
26823 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
26825 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
26827 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
26828 gcc_assert (size_to_move % piece_size == 0);
26829 adjust = GEN_INT (piece_size);
26830 for (i = 0; i < size_to_move; i += piece_size)
26832 if (piece_size <= GET_MODE_SIZE (word_mode))
26834 emit_insn (gen_strset (destptr, dst, promoted_val));
26835 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26836 piece_size);
26837 continue;
26840 emit_insn (GEN_FCN (code) (dst, promoted_val));
26842 emit_move_insn (destptr,
26843 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
26845 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26846 piece_size);
26849 /* Update DST rtx. */
26850 return dst;
26852 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26853 static void
26854 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
26855 rtx count, int max_size)
26857 count =
26858 expand_simple_binop (counter_mode (count), AND, count,
26859 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
26860 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
26861 gen_lowpart (QImode, value), count, QImode,
26862 1, max_size / 2, true);
26865 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26866 static void
26867 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
26868 rtx count, int max_size)
26870 rtx dest;
26872 if (CONST_INT_P (count))
26874 HOST_WIDE_INT countval = INTVAL (count);
26875 HOST_WIDE_INT epilogue_size = countval % max_size;
26876 int i;
26878 /* For now MAX_SIZE should be a power of 2. This assert could be
26879 relaxed, but it'll require a bit more complicated epilogue
26880 expanding. */
26881 gcc_assert ((max_size & (max_size - 1)) == 0);
26882 for (i = max_size; i >= 1; i >>= 1)
26884 if (epilogue_size & i)
26886 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26887 destmem = emit_memset (destmem, destptr, vec_value, i);
26888 else
26889 destmem = emit_memset (destmem, destptr, value, i);
26892 return;
26894 if (max_size > 32)
26896 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
26897 return;
26899 if (max_size > 16)
26901 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
26902 if (TARGET_64BIT)
26904 dest = change_address (destmem, DImode, destptr);
26905 emit_insn (gen_strset (destptr, dest, value));
26906 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
26907 emit_insn (gen_strset (destptr, dest, value));
26909 else
26911 dest = change_address (destmem, SImode, destptr);
26912 emit_insn (gen_strset (destptr, dest, value));
26913 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26914 emit_insn (gen_strset (destptr, dest, value));
26915 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
26916 emit_insn (gen_strset (destptr, dest, value));
26917 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
26918 emit_insn (gen_strset (destptr, dest, value));
26920 emit_label (label);
26921 LABEL_NUSES (label) = 1;
26923 if (max_size > 8)
26925 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
26926 if (TARGET_64BIT)
26928 dest = change_address (destmem, DImode, destptr);
26929 emit_insn (gen_strset (destptr, dest, value));
26931 else
26933 dest = change_address (destmem, SImode, destptr);
26934 emit_insn (gen_strset (destptr, dest, value));
26935 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26936 emit_insn (gen_strset (destptr, dest, value));
26938 emit_label (label);
26939 LABEL_NUSES (label) = 1;
26941 if (max_size > 4)
26943 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26944 dest = change_address (destmem, SImode, destptr);
26945 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
26946 emit_label (label);
26947 LABEL_NUSES (label) = 1;
26949 if (max_size > 2)
26951 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26952 dest = change_address (destmem, HImode, destptr);
26953 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
26954 emit_label (label);
26955 LABEL_NUSES (label) = 1;
26957 if (max_size > 1)
26959 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26960 dest = change_address (destmem, QImode, destptr);
26961 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
26962 emit_label (label);
26963 LABEL_NUSES (label) = 1;
26967 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
26968 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
26969 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
26970 ignored.
26971 Return value is updated DESTMEM. */
26972 static rtx
26973 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
26974 rtx destptr, rtx srcptr, rtx value,
26975 rtx vec_value, rtx count, int align,
26976 int desired_alignment, bool issetmem)
26978 int i;
26979 for (i = 1; i < desired_alignment; i <<= 1)
26981 if (align <= i)
26983 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
26984 if (issetmem)
26986 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26987 destmem = emit_memset (destmem, destptr, vec_value, i);
26988 else
26989 destmem = emit_memset (destmem, destptr, value, i);
26991 else
26992 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
26993 ix86_adjust_counter (count, i);
26994 emit_label (label);
26995 LABEL_NUSES (label) = 1;
26996 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
26999 return destmem;
27002 /* Test if COUNT&SIZE is nonzero and if so, expand movme
27003 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
27004 and jump to DONE_LABEL. */
27005 static void
27006 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
27007 rtx destptr, rtx srcptr,
27008 rtx value, rtx vec_value,
27009 rtx count, int size,
27010 rtx done_label, bool issetmem)
27012 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
27013 machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk ();
27014 rtx modesize;
27015 int n;
27017 /* If we do not have vector value to copy, we must reduce size. */
27018 if (issetmem)
27020 if (!vec_value)
27022 if (GET_MODE (value) == VOIDmode && size > 8)
27023 mode = Pmode;
27024 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
27025 mode = GET_MODE (value);
27027 else
27028 mode = GET_MODE (vec_value), value = vec_value;
27030 else
27032 /* Choose appropriate vector mode. */
27033 if (size >= 32)
27034 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
27035 else if (size >= 16)
27036 mode = TARGET_SSE ? V16QImode : DImode;
27037 srcmem = change_address (srcmem, mode, srcptr);
27039 destmem = change_address (destmem, mode, destptr);
27040 modesize = GEN_INT (GET_MODE_SIZE (mode));
27041 gcc_assert (GET_MODE_SIZE (mode) <= size);
27042 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
27044 if (issetmem)
27045 emit_move_insn (destmem, gen_lowpart (mode, value));
27046 else
27048 emit_move_insn (destmem, srcmem);
27049 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
27051 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
27054 destmem = offset_address (destmem, count, 1);
27055 destmem = offset_address (destmem, GEN_INT (-2 * size),
27056 GET_MODE_SIZE (mode));
27057 if (!issetmem)
27059 srcmem = offset_address (srcmem, count, 1);
27060 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
27061 GET_MODE_SIZE (mode));
27063 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
27065 if (issetmem)
27066 emit_move_insn (destmem, gen_lowpart (mode, value));
27067 else
27069 emit_move_insn (destmem, srcmem);
27070 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
27072 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
27074 emit_jump_insn (gen_jump (done_label));
27075 emit_barrier ();
27077 emit_label (label);
27078 LABEL_NUSES (label) = 1;
27081 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
27082 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
27083 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
27084 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
27085 DONE_LABEL is a label after the whole copying sequence. The label is created
27086 on demand if *DONE_LABEL is NULL.
27087 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
27088 bounds after the initial copies.
27090 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
27091 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
27092 we will dispatch to a library call for large blocks.
27094 In pseudocode we do:
27096 if (COUNT < SIZE)
27098 Assume that SIZE is 4. Bigger sizes are handled analogously
27099 if (COUNT & 4)
27101 copy 4 bytes from SRCPTR to DESTPTR
27102 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
27103 goto done_label
27105 if (!COUNT)
27106 goto done_label;
27107 copy 1 byte from SRCPTR to DESTPTR
27108 if (COUNT & 2)
27110 copy 2 bytes from SRCPTR to DESTPTR
27111 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
27114 else
27116 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
27117 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
27119 OLD_DESPTR = DESTPTR;
27120 Align DESTPTR up to DESIRED_ALIGN
27121 SRCPTR += DESTPTR - OLD_DESTPTR
27122 COUNT -= DEST_PTR - OLD_DESTPTR
27123 if (DYNAMIC_CHECK)
27124 Round COUNT down to multiple of SIZE
27125 << optional caller supplied zero size guard is here >>
27126 << optional caller supplied dynamic check is here >>
27127 << caller supplied main copy loop is here >>
27129 done_label:
27131 static void
27132 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
27133 rtx *destptr, rtx *srcptr,
27134 machine_mode mode,
27135 rtx value, rtx vec_value,
27136 rtx *count,
27137 rtx_code_label **done_label,
27138 int size,
27139 int desired_align,
27140 int align,
27141 unsigned HOST_WIDE_INT *min_size,
27142 bool dynamic_check,
27143 bool issetmem)
27145 rtx_code_label *loop_label = NULL, *label;
27146 int n;
27147 rtx modesize;
27148 int prolog_size = 0;
27149 rtx mode_value;
27151 /* Chose proper value to copy. */
27152 if (issetmem && VECTOR_MODE_P (mode))
27153 mode_value = vec_value;
27154 else
27155 mode_value = value;
27156 gcc_assert (GET_MODE_SIZE (mode) <= size);
27158 /* See if block is big or small, handle small blocks. */
27159 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
27161 int size2 = size;
27162 loop_label = gen_label_rtx ();
27164 if (!*done_label)
27165 *done_label = gen_label_rtx ();
27167 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
27168 1, loop_label);
27169 size2 >>= 1;
27171 /* Handle sizes > 3. */
27172 for (;size2 > 2; size2 >>= 1)
27173 expand_small_movmem_or_setmem (destmem, srcmem,
27174 *destptr, *srcptr,
27175 value, vec_value,
27176 *count,
27177 size2, *done_label, issetmem);
27178 /* Nothing to copy? Jump to DONE_LABEL if so */
27179 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
27180 1, *done_label);
27182 /* Do a byte copy. */
27183 destmem = change_address (destmem, QImode, *destptr);
27184 if (issetmem)
27185 emit_move_insn (destmem, gen_lowpart (QImode, value));
27186 else
27188 srcmem = change_address (srcmem, QImode, *srcptr);
27189 emit_move_insn (destmem, srcmem);
27192 /* Handle sizes 2 and 3. */
27193 label = ix86_expand_aligntest (*count, 2, false);
27194 destmem = change_address (destmem, HImode, *destptr);
27195 destmem = offset_address (destmem, *count, 1);
27196 destmem = offset_address (destmem, GEN_INT (-2), 2);
27197 if (issetmem)
27198 emit_move_insn (destmem, gen_lowpart (HImode, value));
27199 else
27201 srcmem = change_address (srcmem, HImode, *srcptr);
27202 srcmem = offset_address (srcmem, *count, 1);
27203 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
27204 emit_move_insn (destmem, srcmem);
27207 emit_label (label);
27208 LABEL_NUSES (label) = 1;
27209 emit_jump_insn (gen_jump (*done_label));
27210 emit_barrier ();
27212 else
27213 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
27214 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
27216 /* Start memcpy for COUNT >= SIZE. */
27217 if (loop_label)
27219 emit_label (loop_label);
27220 LABEL_NUSES (loop_label) = 1;
27223 /* Copy first desired_align bytes. */
27224 if (!issetmem)
27225 srcmem = change_address (srcmem, mode, *srcptr);
27226 destmem = change_address (destmem, mode, *destptr);
27227 modesize = GEN_INT (GET_MODE_SIZE (mode));
27228 for (n = 0; prolog_size < desired_align - align; n++)
27230 if (issetmem)
27231 emit_move_insn (destmem, mode_value);
27232 else
27234 emit_move_insn (destmem, srcmem);
27235 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
27237 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
27238 prolog_size += GET_MODE_SIZE (mode);
27242 /* Copy last SIZE bytes. */
27243 destmem = offset_address (destmem, *count, 1);
27244 destmem = offset_address (destmem,
27245 GEN_INT (-size - prolog_size),
27247 if (issetmem)
27248 emit_move_insn (destmem, mode_value);
27249 else
27251 srcmem = offset_address (srcmem, *count, 1);
27252 srcmem = offset_address (srcmem,
27253 GEN_INT (-size - prolog_size),
27255 emit_move_insn (destmem, srcmem);
27257 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
27259 destmem = offset_address (destmem, modesize, 1);
27260 if (issetmem)
27261 emit_move_insn (destmem, mode_value);
27262 else
27264 srcmem = offset_address (srcmem, modesize, 1);
27265 emit_move_insn (destmem, srcmem);
27269 /* Align destination. */
27270 if (desired_align > 1 && desired_align > align)
27272 rtx saveddest = *destptr;
27274 gcc_assert (desired_align <= size);
27275 /* Align destptr up, place it to new register. */
27276 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
27277 GEN_INT (prolog_size),
27278 NULL_RTX, 1, OPTAB_DIRECT);
27279 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
27280 REG_POINTER (*destptr) = 1;
27281 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
27282 GEN_INT (-desired_align),
27283 *destptr, 1, OPTAB_DIRECT);
27284 /* See how many bytes we skipped. */
27285 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
27286 *destptr,
27287 saveddest, 1, OPTAB_DIRECT);
27288 /* Adjust srcptr and count. */
27289 if (!issetmem)
27290 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
27291 saveddest, *srcptr, 1, OPTAB_DIRECT);
27292 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
27293 saveddest, *count, 1, OPTAB_DIRECT);
27294 /* We copied at most size + prolog_size. */
27295 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
27296 *min_size
27297 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
27298 else
27299 *min_size = 0;
27301 /* Our loops always round down the block size, but for dispatch to
27302 library we need precise value. */
27303 if (dynamic_check)
27304 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
27305 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
27307 else
27309 gcc_assert (prolog_size == 0);
27310 /* Decrease count, so we won't end up copying last word twice. */
27311 if (!CONST_INT_P (*count))
27312 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
27313 constm1_rtx, *count, 1, OPTAB_DIRECT);
27314 else
27315 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
27316 (unsigned HOST_WIDE_INT)size));
27317 if (*min_size)
27318 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
27323 /* This function is like the previous one, except here we know how many bytes
27324 need to be copied. That allows us to update alignment not only of DST, which
27325 is returned, but also of SRC, which is passed as a pointer for that
27326 reason. */
27327 static rtx
27328 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
27329 rtx srcreg, rtx value, rtx vec_value,
27330 int desired_align, int align_bytes,
27331 bool issetmem)
27333 rtx src = NULL;
27334 rtx orig_dst = dst;
27335 rtx orig_src = NULL;
27336 int piece_size = 1;
27337 int copied_bytes = 0;
27339 if (!issetmem)
27341 gcc_assert (srcp != NULL);
27342 src = *srcp;
27343 orig_src = src;
27346 for (piece_size = 1;
27347 piece_size <= desired_align && copied_bytes < align_bytes;
27348 piece_size <<= 1)
27350 if (align_bytes & piece_size)
27352 if (issetmem)
27354 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
27355 dst = emit_memset (dst, destreg, vec_value, piece_size);
27356 else
27357 dst = emit_memset (dst, destreg, value, piece_size);
27359 else
27360 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
27361 copied_bytes += piece_size;
27364 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
27365 set_mem_align (dst, desired_align * BITS_PER_UNIT);
27366 if (MEM_SIZE_KNOWN_P (orig_dst))
27367 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
27369 if (!issetmem)
27371 int src_align_bytes = get_mem_align_offset (src, desired_align
27372 * BITS_PER_UNIT);
27373 if (src_align_bytes >= 0)
27374 src_align_bytes = desired_align - src_align_bytes;
27375 if (src_align_bytes >= 0)
27377 unsigned int src_align;
27378 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
27380 if ((src_align_bytes & (src_align - 1))
27381 == (align_bytes & (src_align - 1)))
27382 break;
27384 if (src_align > (unsigned int) desired_align)
27385 src_align = desired_align;
27386 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
27387 set_mem_align (src, src_align * BITS_PER_UNIT);
27389 if (MEM_SIZE_KNOWN_P (orig_src))
27390 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
27391 *srcp = src;
27394 return dst;
27397 /* Return true if ALG can be used in current context.
27398 Assume we expand memset if MEMSET is true. */
27399 static bool
27400 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
27402 if (alg == no_stringop)
27403 return false;
27404 if (alg == vector_loop)
27405 return TARGET_SSE || TARGET_AVX;
27406 /* Algorithms using the rep prefix want at least edi and ecx;
27407 additionally, memset wants eax and memcpy wants esi. Don't
27408 consider such algorithms if the user has appropriated those
27409 registers for their own purposes, or if we have a non-default
27410 address space, since some string insns cannot override the segment. */
27411 if (alg == rep_prefix_1_byte
27412 || alg == rep_prefix_4_byte
27413 || alg == rep_prefix_8_byte)
27415 if (have_as)
27416 return false;
27417 if (fixed_regs[CX_REG]
27418 || fixed_regs[DI_REG]
27419 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
27420 return false;
27422 return true;
27425 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
27426 static enum stringop_alg
27427 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
27428 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
27429 bool memset, bool zero_memset, bool have_as,
27430 int *dynamic_check, bool *noalign, bool recur)
27432 const struct stringop_algs *algs;
27433 bool optimize_for_speed;
27434 int max = 0;
27435 const struct processor_costs *cost;
27436 int i;
27437 bool any_alg_usable_p = false;
27439 *noalign = false;
27440 *dynamic_check = -1;
27442 /* Even if the string operation call is cold, we still might spend a lot
27443 of time processing large blocks. */
27444 if (optimize_function_for_size_p (cfun)
27445 || (optimize_insn_for_size_p ()
27446 && (max_size < 256
27447 || (expected_size != -1 && expected_size < 256))))
27448 optimize_for_speed = false;
27449 else
27450 optimize_for_speed = true;
27452 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
27453 if (memset)
27454 algs = &cost->memset[TARGET_64BIT != 0];
27455 else
27456 algs = &cost->memcpy[TARGET_64BIT != 0];
27458 /* See maximal size for user defined algorithm. */
27459 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
27461 enum stringop_alg candidate = algs->size[i].alg;
27462 bool usable = alg_usable_p (candidate, memset, have_as);
27463 any_alg_usable_p |= usable;
27465 if (candidate != libcall && candidate && usable)
27466 max = algs->size[i].max;
27469 /* If expected size is not known but max size is small enough
27470 so inline version is a win, set expected size into
27471 the range. */
27472 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
27473 && expected_size == -1)
27474 expected_size = min_size / 2 + max_size / 2;
27476 /* If user specified the algorithm, honor it if possible. */
27477 if (ix86_stringop_alg != no_stringop
27478 && alg_usable_p (ix86_stringop_alg, memset, have_as))
27479 return ix86_stringop_alg;
27480 /* rep; movq or rep; movl is the smallest variant. */
27481 else if (!optimize_for_speed)
27483 *noalign = true;
27484 if (!count || (count & 3) || (memset && !zero_memset))
27485 return alg_usable_p (rep_prefix_1_byte, memset, have_as)
27486 ? rep_prefix_1_byte : loop_1_byte;
27487 else
27488 return alg_usable_p (rep_prefix_4_byte, memset, have_as)
27489 ? rep_prefix_4_byte : loop;
27491 /* Very tiny blocks are best handled via the loop, REP is expensive to
27492 setup. */
27493 else if (expected_size != -1 && expected_size < 4)
27494 return loop_1_byte;
27495 else if (expected_size != -1)
27497 enum stringop_alg alg = libcall;
27498 bool alg_noalign = false;
27499 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
27501 /* We get here if the algorithms that were not libcall-based
27502 were rep-prefix based and we are unable to use rep prefixes
27503 based on global register usage. Break out of the loop and
27504 use the heuristic below. */
27505 if (algs->size[i].max == 0)
27506 break;
27507 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
27509 enum stringop_alg candidate = algs->size[i].alg;
27511 if (candidate != libcall
27512 && alg_usable_p (candidate, memset, have_as))
27514 alg = candidate;
27515 alg_noalign = algs->size[i].noalign;
27517 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
27518 last non-libcall inline algorithm. */
27519 if (TARGET_INLINE_ALL_STRINGOPS)
27521 /* When the current size is best to be copied by a libcall,
27522 but we are still forced to inline, run the heuristic below
27523 that will pick code for medium sized blocks. */
27524 if (alg != libcall)
27526 *noalign = alg_noalign;
27527 return alg;
27529 else if (!any_alg_usable_p)
27530 break;
27532 else if (alg_usable_p (candidate, memset, have_as))
27534 *noalign = algs->size[i].noalign;
27535 return candidate;
27540 /* When asked to inline the call anyway, try to pick meaningful choice.
27541 We look for maximal size of block that is faster to copy by hand and
27542 take blocks of at most of that size guessing that average size will
27543 be roughly half of the block.
27545 If this turns out to be bad, we might simply specify the preferred
27546 choice in ix86_costs. */
27547 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
27548 && (algs->unknown_size == libcall
27549 || !alg_usable_p (algs->unknown_size, memset, have_as)))
27551 enum stringop_alg alg;
27552 HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
27554 /* If there aren't any usable algorithms or if recursing already,
27555 then recursing on smaller sizes or same size isn't going to
27556 find anything. Just return the simple byte-at-a-time copy loop. */
27557 if (!any_alg_usable_p || recur)
27559 /* Pick something reasonable. */
27560 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
27561 *dynamic_check = 128;
27562 return loop_1_byte;
27564 alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
27565 zero_memset, have_as, dynamic_check, noalign, true);
27566 gcc_assert (*dynamic_check == -1);
27567 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
27568 *dynamic_check = max;
27569 else
27570 gcc_assert (alg != libcall);
27571 return alg;
27573 return (alg_usable_p (algs->unknown_size, memset, have_as)
27574 ? algs->unknown_size : libcall);
27577 /* Decide on alignment. We know that the operand is already aligned to ALIGN
27578 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
27579 static int
27580 decide_alignment (int align,
27581 enum stringop_alg alg,
27582 int expected_size,
27583 machine_mode move_mode)
27585 int desired_align = 0;
27587 gcc_assert (alg != no_stringop);
27589 if (alg == libcall)
27590 return 0;
27591 if (move_mode == VOIDmode)
27592 return 0;
27594 desired_align = GET_MODE_SIZE (move_mode);
27595 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
27596 copying whole cacheline at once. */
27597 if (TARGET_PENTIUMPRO
27598 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
27599 desired_align = 8;
27601 if (optimize_size)
27602 desired_align = 1;
27603 if (desired_align < align)
27604 desired_align = align;
27605 if (expected_size != -1 && expected_size < 4)
27606 desired_align = align;
27608 return desired_align;
27612 /* Helper function for memcpy. For QImode value 0xXY produce
27613 0xXYXYXYXY of wide specified by MODE. This is essentially
27614 a * 0x10101010, but we can do slightly better than
27615 synth_mult by unwinding the sequence by hand on CPUs with
27616 slow multiply. */
27617 static rtx
27618 promote_duplicated_reg (machine_mode mode, rtx val)
27620 machine_mode valmode = GET_MODE (val);
27621 rtx tmp;
27622 int nops = mode == DImode ? 3 : 2;
27624 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
27625 if (val == const0_rtx)
27626 return copy_to_mode_reg (mode, CONST0_RTX (mode));
27627 if (CONST_INT_P (val))
27629 HOST_WIDE_INT v = INTVAL (val) & 255;
27631 v |= v << 8;
27632 v |= v << 16;
27633 if (mode == DImode)
27634 v |= (v << 16) << 16;
27635 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
27638 if (valmode == VOIDmode)
27639 valmode = QImode;
27640 if (valmode != QImode)
27641 val = gen_lowpart (QImode, val);
27642 if (mode == QImode)
27643 return val;
27644 if (!TARGET_PARTIAL_REG_STALL)
27645 nops--;
27646 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
27647 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
27648 <= (ix86_cost->shift_const + ix86_cost->add) * nops
27649 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
27651 rtx reg = convert_modes (mode, QImode, val, true);
27652 tmp = promote_duplicated_reg (mode, const1_rtx);
27653 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
27654 OPTAB_DIRECT);
27656 else
27658 rtx reg = convert_modes (mode, QImode, val, true);
27660 if (!TARGET_PARTIAL_REG_STALL)
27661 if (mode == SImode)
27662 emit_insn (gen_insvsi_1 (reg, reg));
27663 else
27664 emit_insn (gen_insvdi_1 (reg, reg));
27665 else
27667 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
27668 NULL, 1, OPTAB_DIRECT);
27669 reg =
27670 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27672 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
27673 NULL, 1, OPTAB_DIRECT);
27674 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27675 if (mode == SImode)
27676 return reg;
27677 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
27678 NULL, 1, OPTAB_DIRECT);
27679 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27680 return reg;
27684 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
27685 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
27686 alignment from ALIGN to DESIRED_ALIGN. */
27687 static rtx
27688 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
27689 int align)
27691 rtx promoted_val;
27693 if (TARGET_64BIT
27694 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
27695 promoted_val = promote_duplicated_reg (DImode, val);
27696 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
27697 promoted_val = promote_duplicated_reg (SImode, val);
27698 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
27699 promoted_val = promote_duplicated_reg (HImode, val);
27700 else
27701 promoted_val = val;
27703 return promoted_val;
27706 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
27707 operations when profitable. The code depends upon architecture, block size
27708 and alignment, but always has one of the following overall structures:
27710 Aligned move sequence:
27712 1) Prologue guard: Conditional that jumps up to epilogues for small
27713 blocks that can be handled by epilogue alone. This is faster
27714 but also needed for correctness, since prologue assume the block
27715 is larger than the desired alignment.
27717 Optional dynamic check for size and libcall for large
27718 blocks is emitted here too, with -minline-stringops-dynamically.
27720 2) Prologue: copy first few bytes in order to get destination
27721 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
27722 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
27723 copied. We emit either a jump tree on power of two sized
27724 blocks, or a byte loop.
27726 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27727 with specified algorithm.
27729 4) Epilogue: code copying tail of the block that is too small to be
27730 handled by main body (or up to size guarded by prologue guard).
27732 Misaligned move sequence
27734 1) missaligned move prologue/epilogue containing:
27735 a) Prologue handling small memory blocks and jumping to done_label
27736 (skipped if blocks are known to be large enough)
27737 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
27738 needed by single possibly misaligned move
27739 (skipped if alignment is not needed)
27740 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
27742 2) Zero size guard dispatching to done_label, if needed
27744 3) dispatch to library call, if needed,
27746 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27747 with specified algorithm. */
27748 bool
27749 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
27750 rtx align_exp, rtx expected_align_exp,
27751 rtx expected_size_exp, rtx min_size_exp,
27752 rtx max_size_exp, rtx probable_max_size_exp,
27753 bool issetmem)
27755 rtx destreg;
27756 rtx srcreg = NULL;
27757 rtx_code_label *label = NULL;
27758 rtx tmp;
27759 rtx_code_label *jump_around_label = NULL;
27760 HOST_WIDE_INT align = 1;
27761 unsigned HOST_WIDE_INT count = 0;
27762 HOST_WIDE_INT expected_size = -1;
27763 int size_needed = 0, epilogue_size_needed;
27764 int desired_align = 0, align_bytes = 0;
27765 enum stringop_alg alg;
27766 rtx promoted_val = NULL;
27767 rtx vec_promoted_val = NULL;
27768 bool force_loopy_epilogue = false;
27769 int dynamic_check;
27770 bool need_zero_guard = false;
27771 bool noalign;
27772 machine_mode move_mode = VOIDmode;
27773 machine_mode wider_mode;
27774 int unroll_factor = 1;
27775 /* TODO: Once value ranges are available, fill in proper data. */
27776 unsigned HOST_WIDE_INT min_size = 0;
27777 unsigned HOST_WIDE_INT max_size = -1;
27778 unsigned HOST_WIDE_INT probable_max_size = -1;
27779 bool misaligned_prologue_used = false;
27780 bool have_as;
27782 if (CONST_INT_P (align_exp))
27783 align = INTVAL (align_exp);
27784 /* i386 can do misaligned access on reasonably increased cost. */
27785 if (CONST_INT_P (expected_align_exp)
27786 && INTVAL (expected_align_exp) > align)
27787 align = INTVAL (expected_align_exp);
27788 /* ALIGN is the minimum of destination and source alignment, but we care here
27789 just about destination alignment. */
27790 else if (!issetmem
27791 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
27792 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
27794 if (CONST_INT_P (count_exp))
27796 min_size = max_size = probable_max_size = count = expected_size
27797 = INTVAL (count_exp);
27798 /* When COUNT is 0, there is nothing to do. */
27799 if (!count)
27800 return true;
27802 else
27804 if (min_size_exp)
27805 min_size = INTVAL (min_size_exp);
27806 if (max_size_exp)
27807 max_size = INTVAL (max_size_exp);
27808 if (probable_max_size_exp)
27809 probable_max_size = INTVAL (probable_max_size_exp);
27810 if (CONST_INT_P (expected_size_exp))
27811 expected_size = INTVAL (expected_size_exp);
27814 /* Make sure we don't need to care about overflow later on. */
27815 if (count > (HOST_WIDE_INT_1U << 30))
27816 return false;
27818 have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
27819 if (!issetmem)
27820 have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
27822 /* Step 0: Decide on preferred algorithm, desired alignment and
27823 size of chunks to be copied by main loop. */
27824 alg = decide_alg (count, expected_size, min_size, probable_max_size,
27825 issetmem,
27826 issetmem && val_exp == const0_rtx, have_as,
27827 &dynamic_check, &noalign, false);
27828 if (alg == libcall)
27829 return false;
27830 gcc_assert (alg != no_stringop);
27832 /* For now vector-version of memset is generated only for memory zeroing, as
27833 creating of promoted vector value is very cheap in this case. */
27834 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
27835 alg = unrolled_loop;
27837 if (!count)
27838 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
27839 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
27840 if (!issetmem)
27841 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
27843 unroll_factor = 1;
27844 move_mode = word_mode;
27845 switch (alg)
27847 case libcall:
27848 case no_stringop:
27849 case last_alg:
27850 gcc_unreachable ();
27851 case loop_1_byte:
27852 need_zero_guard = true;
27853 move_mode = QImode;
27854 break;
27855 case loop:
27856 need_zero_guard = true;
27857 break;
27858 case unrolled_loop:
27859 need_zero_guard = true;
27860 unroll_factor = (TARGET_64BIT ? 4 : 2);
27861 break;
27862 case vector_loop:
27863 need_zero_guard = true;
27864 unroll_factor = 4;
27865 /* Find the widest supported mode. */
27866 move_mode = word_mode;
27867 while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
27868 && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
27869 move_mode = wider_mode;
27871 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128)
27872 move_mode = TImode;
27874 /* Find the corresponding vector mode with the same size as MOVE_MODE.
27875 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
27876 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
27878 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
27879 if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
27880 || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
27881 move_mode = word_mode;
27883 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
27884 break;
27885 case rep_prefix_8_byte:
27886 move_mode = DImode;
27887 break;
27888 case rep_prefix_4_byte:
27889 move_mode = SImode;
27890 break;
27891 case rep_prefix_1_byte:
27892 move_mode = QImode;
27893 break;
27895 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
27896 epilogue_size_needed = size_needed;
27898 /* If we are going to call any library calls conditionally, make sure any
27899 pending stack adjustment happen before the first conditional branch,
27900 otherwise they will be emitted before the library call only and won't
27901 happen from the other branches. */
27902 if (dynamic_check != -1)
27903 do_pending_stack_adjust ();
27905 desired_align = decide_alignment (align, alg, expected_size, move_mode);
27906 if (!TARGET_ALIGN_STRINGOPS || noalign)
27907 align = desired_align;
27909 /* Step 1: Prologue guard. */
27911 /* Alignment code needs count to be in register. */
27912 if (CONST_INT_P (count_exp) && desired_align > align)
27914 if (INTVAL (count_exp) > desired_align
27915 && INTVAL (count_exp) > size_needed)
27917 align_bytes
27918 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
27919 if (align_bytes <= 0)
27920 align_bytes = 0;
27921 else
27922 align_bytes = desired_align - align_bytes;
27924 if (align_bytes == 0)
27925 count_exp = force_reg (counter_mode (count_exp), count_exp);
27927 gcc_assert (desired_align >= 1 && align >= 1);
27929 /* Misaligned move sequences handle both prologue and epilogue at once.
27930 Default code generation results in a smaller code for large alignments
27931 and also avoids redundant job when sizes are known precisely. */
27932 misaligned_prologue_used
27933 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
27934 && MAX (desired_align, epilogue_size_needed) <= 32
27935 && desired_align <= epilogue_size_needed
27936 && ((desired_align > align && !align_bytes)
27937 || (!count && epilogue_size_needed > 1)));
27939 /* Do the cheap promotion to allow better CSE across the
27940 main loop and epilogue (ie one load of the big constant in the
27941 front of all code.
27942 For now the misaligned move sequences do not have fast path
27943 without broadcasting. */
27944 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
27946 if (alg == vector_loop)
27948 gcc_assert (val_exp == const0_rtx);
27949 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
27950 promoted_val = promote_duplicated_reg_to_size (val_exp,
27951 GET_MODE_SIZE (word_mode),
27952 desired_align, align);
27954 else
27956 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
27957 desired_align, align);
27960 /* Misaligned move sequences handles both prologues and epilogues at once.
27961 Default code generation results in smaller code for large alignments and
27962 also avoids redundant job when sizes are known precisely. */
27963 if (misaligned_prologue_used)
27965 /* Misaligned move prologue handled small blocks by itself. */
27966 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
27967 (dst, src, &destreg, &srcreg,
27968 move_mode, promoted_val, vec_promoted_val,
27969 &count_exp,
27970 &jump_around_label,
27971 desired_align < align
27972 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
27973 desired_align, align, &min_size, dynamic_check, issetmem);
27974 if (!issetmem)
27975 src = change_address (src, BLKmode, srcreg);
27976 dst = change_address (dst, BLKmode, destreg);
27977 set_mem_align (dst, desired_align * BITS_PER_UNIT);
27978 epilogue_size_needed = 0;
27979 if (need_zero_guard
27980 && min_size < (unsigned HOST_WIDE_INT) size_needed)
27982 /* It is possible that we copied enough so the main loop will not
27983 execute. */
27984 gcc_assert (size_needed > 1);
27985 if (jump_around_label == NULL_RTX)
27986 jump_around_label = gen_label_rtx ();
27987 emit_cmp_and_jump_insns (count_exp,
27988 GEN_INT (size_needed),
27989 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
27990 if (expected_size == -1
27991 || expected_size < (desired_align - align) / 2 + size_needed)
27992 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27993 else
27994 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27997 /* Ensure that alignment prologue won't copy past end of block. */
27998 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
28000 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
28001 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
28002 Make sure it is power of 2. */
28003 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
28005 /* To improve performance of small blocks, we jump around the VAL
28006 promoting mode. This mean that if the promoted VAL is not constant,
28007 we might not use it in the epilogue and have to use byte
28008 loop variant. */
28009 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
28010 force_loopy_epilogue = true;
28011 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
28012 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
28014 /* If main algorithm works on QImode, no epilogue is needed.
28015 For small sizes just don't align anything. */
28016 if (size_needed == 1)
28017 desired_align = align;
28018 else
28019 goto epilogue;
28021 else if (!count
28022 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
28024 label = gen_label_rtx ();
28025 emit_cmp_and_jump_insns (count_exp,
28026 GEN_INT (epilogue_size_needed),
28027 LTU, 0, counter_mode (count_exp), 1, label);
28028 if (expected_size == -1 || expected_size < epilogue_size_needed)
28029 predict_jump (REG_BR_PROB_BASE * 60 / 100);
28030 else
28031 predict_jump (REG_BR_PROB_BASE * 20 / 100);
28035 /* Emit code to decide on runtime whether library call or inline should be
28036 used. */
28037 if (dynamic_check != -1)
28039 if (!issetmem && CONST_INT_P (count_exp))
28041 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
28043 emit_block_copy_via_libcall (dst, src, count_exp);
28044 count_exp = const0_rtx;
28045 goto epilogue;
28048 else
28050 rtx_code_label *hot_label = gen_label_rtx ();
28051 if (jump_around_label == NULL_RTX)
28052 jump_around_label = gen_label_rtx ();
28053 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
28054 LEU, 0, counter_mode (count_exp),
28055 1, hot_label);
28056 predict_jump (REG_BR_PROB_BASE * 90 / 100);
28057 if (issetmem)
28058 set_storage_via_libcall (dst, count_exp, val_exp);
28059 else
28060 emit_block_copy_via_libcall (dst, src, count_exp);
28061 emit_jump (jump_around_label);
28062 emit_label (hot_label);
28066 /* Step 2: Alignment prologue. */
28067 /* Do the expensive promotion once we branched off the small blocks. */
28068 if (issetmem && !promoted_val)
28069 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
28070 desired_align, align);
28072 if (desired_align > align && !misaligned_prologue_used)
28074 if (align_bytes == 0)
28076 /* Except for the first move in prologue, we no longer know
28077 constant offset in aliasing info. It don't seems to worth
28078 the pain to maintain it for the first move, so throw away
28079 the info early. */
28080 dst = change_address (dst, BLKmode, destreg);
28081 if (!issetmem)
28082 src = change_address (src, BLKmode, srcreg);
28083 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
28084 promoted_val, vec_promoted_val,
28085 count_exp, align, desired_align,
28086 issetmem);
28087 /* At most desired_align - align bytes are copied. */
28088 if (min_size < (unsigned)(desired_align - align))
28089 min_size = 0;
28090 else
28091 min_size -= desired_align - align;
28093 else
28095 /* If we know how many bytes need to be stored before dst is
28096 sufficiently aligned, maintain aliasing info accurately. */
28097 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
28098 srcreg,
28099 promoted_val,
28100 vec_promoted_val,
28101 desired_align,
28102 align_bytes,
28103 issetmem);
28105 count_exp = plus_constant (counter_mode (count_exp),
28106 count_exp, -align_bytes);
28107 count -= align_bytes;
28108 min_size -= align_bytes;
28109 max_size -= align_bytes;
28111 if (need_zero_guard
28112 && min_size < (unsigned HOST_WIDE_INT) size_needed
28113 && (count < (unsigned HOST_WIDE_INT) size_needed
28114 || (align_bytes == 0
28115 && count < ((unsigned HOST_WIDE_INT) size_needed
28116 + desired_align - align))))
28118 /* It is possible that we copied enough so the main loop will not
28119 execute. */
28120 gcc_assert (size_needed > 1);
28121 if (label == NULL_RTX)
28122 label = gen_label_rtx ();
28123 emit_cmp_and_jump_insns (count_exp,
28124 GEN_INT (size_needed),
28125 LTU, 0, counter_mode (count_exp), 1, label);
28126 if (expected_size == -1
28127 || expected_size < (desired_align - align) / 2 + size_needed)
28128 predict_jump (REG_BR_PROB_BASE * 20 / 100);
28129 else
28130 predict_jump (REG_BR_PROB_BASE * 60 / 100);
28133 if (label && size_needed == 1)
28135 emit_label (label);
28136 LABEL_NUSES (label) = 1;
28137 label = NULL;
28138 epilogue_size_needed = 1;
28139 if (issetmem)
28140 promoted_val = val_exp;
28142 else if (label == NULL_RTX && !misaligned_prologue_used)
28143 epilogue_size_needed = size_needed;
28145 /* Step 3: Main loop. */
28147 switch (alg)
28149 case libcall:
28150 case no_stringop:
28151 case last_alg:
28152 gcc_unreachable ();
28153 case loop_1_byte:
28154 case loop:
28155 case unrolled_loop:
28156 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
28157 count_exp, move_mode, unroll_factor,
28158 expected_size, issetmem);
28159 break;
28160 case vector_loop:
28161 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
28162 vec_promoted_val, count_exp, move_mode,
28163 unroll_factor, expected_size, issetmem);
28164 break;
28165 case rep_prefix_8_byte:
28166 case rep_prefix_4_byte:
28167 case rep_prefix_1_byte:
28168 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
28169 val_exp, count_exp, move_mode, issetmem);
28170 break;
28172 /* Adjust properly the offset of src and dest memory for aliasing. */
28173 if (CONST_INT_P (count_exp))
28175 if (!issetmem)
28176 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
28177 (count / size_needed) * size_needed);
28178 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
28179 (count / size_needed) * size_needed);
28181 else
28183 if (!issetmem)
28184 src = change_address (src, BLKmode, srcreg);
28185 dst = change_address (dst, BLKmode, destreg);
28188 /* Step 4: Epilogue to copy the remaining bytes. */
28189 epilogue:
28190 if (label)
28192 /* When the main loop is done, COUNT_EXP might hold original count,
28193 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
28194 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
28195 bytes. Compensate if needed. */
28197 if (size_needed < epilogue_size_needed)
28199 tmp =
28200 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
28201 GEN_INT (size_needed - 1), count_exp, 1,
28202 OPTAB_DIRECT);
28203 if (tmp != count_exp)
28204 emit_move_insn (count_exp, tmp);
28206 emit_label (label);
28207 LABEL_NUSES (label) = 1;
28210 if (count_exp != const0_rtx && epilogue_size_needed > 1)
28212 if (force_loopy_epilogue)
28213 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
28214 epilogue_size_needed);
28215 else
28217 if (issetmem)
28218 expand_setmem_epilogue (dst, destreg, promoted_val,
28219 vec_promoted_val, count_exp,
28220 epilogue_size_needed);
28221 else
28222 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
28223 epilogue_size_needed);
28226 if (jump_around_label)
28227 emit_label (jump_around_label);
28228 return true;
28232 /* Expand the appropriate insns for doing strlen if not just doing
28233 repnz; scasb
28235 out = result, initialized with the start address
28236 align_rtx = alignment of the address.
28237 scratch = scratch register, initialized with the startaddress when
28238 not aligned, otherwise undefined
28240 This is just the body. It needs the initializations mentioned above and
28241 some address computing at the end. These things are done in i386.md. */
28243 static void
28244 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
28246 int align;
28247 rtx tmp;
28248 rtx_code_label *align_2_label = NULL;
28249 rtx_code_label *align_3_label = NULL;
28250 rtx_code_label *align_4_label = gen_label_rtx ();
28251 rtx_code_label *end_0_label = gen_label_rtx ();
28252 rtx mem;
28253 rtx tmpreg = gen_reg_rtx (SImode);
28254 rtx scratch = gen_reg_rtx (SImode);
28255 rtx cmp;
28257 align = 0;
28258 if (CONST_INT_P (align_rtx))
28259 align = INTVAL (align_rtx);
28261 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
28263 /* Is there a known alignment and is it less than 4? */
28264 if (align < 4)
28266 rtx scratch1 = gen_reg_rtx (Pmode);
28267 emit_move_insn (scratch1, out);
28268 /* Is there a known alignment and is it not 2? */
28269 if (align != 2)
28271 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
28272 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
28274 /* Leave just the 3 lower bits. */
28275 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
28276 NULL_RTX, 0, OPTAB_WIDEN);
28278 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
28279 Pmode, 1, align_4_label);
28280 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
28281 Pmode, 1, align_2_label);
28282 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
28283 Pmode, 1, align_3_label);
28285 else
28287 /* Since the alignment is 2, we have to check 2 or 0 bytes;
28288 check if is aligned to 4 - byte. */
28290 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
28291 NULL_RTX, 0, OPTAB_WIDEN);
28293 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
28294 Pmode, 1, align_4_label);
28297 mem = change_address (src, QImode, out);
28299 /* Now compare the bytes. */
28301 /* Compare the first n unaligned byte on a byte per byte basis. */
28302 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
28303 QImode, 1, end_0_label);
28305 /* Increment the address. */
28306 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
28308 /* Not needed with an alignment of 2 */
28309 if (align != 2)
28311 emit_label (align_2_label);
28313 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
28314 end_0_label);
28316 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
28318 emit_label (align_3_label);
28321 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
28322 end_0_label);
28324 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
28327 /* Generate loop to check 4 bytes at a time. It is not a good idea to
28328 align this loop. It gives only huge programs, but does not help to
28329 speed up. */
28330 emit_label (align_4_label);
28332 mem = change_address (src, SImode, out);
28333 emit_move_insn (scratch, mem);
28334 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
28336 /* This formula yields a nonzero result iff one of the bytes is zero.
28337 This saves three branches inside loop and many cycles. */
28339 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
28340 emit_insn (gen_one_cmplsi2 (scratch, scratch));
28341 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
28342 emit_insn (gen_andsi3 (tmpreg, tmpreg,
28343 gen_int_mode (0x80808080, SImode)));
28344 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
28345 align_4_label);
28347 if (TARGET_CMOVE)
28349 rtx reg = gen_reg_rtx (SImode);
28350 rtx reg2 = gen_reg_rtx (Pmode);
28351 emit_move_insn (reg, tmpreg);
28352 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
28354 /* If zero is not in the first two bytes, move two bytes forward. */
28355 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
28356 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28357 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
28358 emit_insn (gen_rtx_SET (tmpreg,
28359 gen_rtx_IF_THEN_ELSE (SImode, tmp,
28360 reg,
28361 tmpreg)));
28362 /* Emit lea manually to avoid clobbering of flags. */
28363 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
28365 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28366 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
28367 emit_insn (gen_rtx_SET (out,
28368 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
28369 reg2,
28370 out)));
28372 else
28374 rtx_code_label *end_2_label = gen_label_rtx ();
28375 /* Is zero in the first two bytes? */
28377 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
28378 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28379 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
28380 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28381 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
28382 pc_rtx);
28383 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
28384 JUMP_LABEL (tmp) = end_2_label;
28386 /* Not in the first two. Move two bytes forward. */
28387 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
28388 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
28390 emit_label (end_2_label);
28394 /* Avoid branch in fixing the byte. */
28395 tmpreg = gen_lowpart (QImode, tmpreg);
28396 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
28397 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
28398 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
28399 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
28401 emit_label (end_0_label);
28404 /* Expand strlen. */
28406 bool
28407 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
28409 rtx addr, scratch1, scratch2, scratch3, scratch4;
28411 /* The generic case of strlen expander is long. Avoid it's
28412 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
28414 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
28415 && !TARGET_INLINE_ALL_STRINGOPS
28416 && !optimize_insn_for_size_p ()
28417 && (!CONST_INT_P (align) || INTVAL (align) < 4))
28418 return false;
28420 addr = force_reg (Pmode, XEXP (src, 0));
28421 scratch1 = gen_reg_rtx (Pmode);
28423 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
28424 && !optimize_insn_for_size_p ())
28426 /* Well it seems that some optimizer does not combine a call like
28427 foo(strlen(bar), strlen(bar));
28428 when the move and the subtraction is done here. It does calculate
28429 the length just once when these instructions are done inside of
28430 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
28431 often used and I use one fewer register for the lifetime of
28432 output_strlen_unroll() this is better. */
28434 emit_move_insn (out, addr);
28436 ix86_expand_strlensi_unroll_1 (out, src, align);
28438 /* strlensi_unroll_1 returns the address of the zero at the end of
28439 the string, like memchr(), so compute the length by subtracting
28440 the start address. */
28441 emit_insn (ix86_gen_sub3 (out, out, addr));
28443 else
28445 rtx unspec;
28447 /* Can't use this if the user has appropriated eax, ecx, or edi. */
28448 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
28449 return false;
28450 /* Can't use this for non-default address spaces. */
28451 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
28452 return false;
28454 scratch2 = gen_reg_rtx (Pmode);
28455 scratch3 = gen_reg_rtx (Pmode);
28456 scratch4 = force_reg (Pmode, constm1_rtx);
28458 emit_move_insn (scratch3, addr);
28459 eoschar = force_reg (QImode, eoschar);
28461 src = replace_equiv_address_nv (src, scratch3);
28463 /* If .md starts supporting :P, this can be done in .md. */
28464 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
28465 scratch4), UNSPEC_SCAS);
28466 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
28467 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
28468 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
28470 return true;
28473 /* For given symbol (function) construct code to compute address of it's PLT
28474 entry in large x86-64 PIC model. */
28475 static rtx
28476 construct_plt_address (rtx symbol)
28478 rtx tmp, unspec;
28480 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
28481 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
28482 gcc_assert (Pmode == DImode);
28484 tmp = gen_reg_rtx (Pmode);
28485 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
28487 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
28488 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
28489 return tmp;
28493 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
28494 rtx callarg2,
28495 rtx pop, bool sibcall)
28497 rtx vec[3];
28498 rtx use = NULL, call;
28499 unsigned int vec_len = 0;
28500 tree fndecl;
28502 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
28504 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
28505 if (fndecl
28506 && (lookup_attribute ("interrupt",
28507 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))
28508 error ("interrupt service routine can't be called directly");
28510 else
28511 fndecl = NULL_TREE;
28513 if (pop == const0_rtx)
28514 pop = NULL;
28515 gcc_assert (!TARGET_64BIT || !pop);
28517 if (TARGET_MACHO && !TARGET_64BIT)
28519 #if TARGET_MACHO
28520 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
28521 fnaddr = machopic_indirect_call_target (fnaddr);
28522 #endif
28524 else
28526 /* Static functions and indirect calls don't need the pic register. Also,
28527 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
28528 it an indirect call. */
28529 rtx addr = XEXP (fnaddr, 0);
28530 if (flag_pic
28531 && GET_CODE (addr) == SYMBOL_REF
28532 && !SYMBOL_REF_LOCAL_P (addr))
28534 if (flag_plt
28535 && (SYMBOL_REF_DECL (addr) == NULL_TREE
28536 || !lookup_attribute ("noplt",
28537 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
28539 if (!TARGET_64BIT
28540 || (ix86_cmodel == CM_LARGE_PIC
28541 && DEFAULT_ABI != MS_ABI))
28543 use_reg (&use, gen_rtx_REG (Pmode,
28544 REAL_PIC_OFFSET_TABLE_REGNUM));
28545 if (ix86_use_pseudo_pic_reg ())
28546 emit_move_insn (gen_rtx_REG (Pmode,
28547 REAL_PIC_OFFSET_TABLE_REGNUM),
28548 pic_offset_table_rtx);
28551 else if (!TARGET_PECOFF && !TARGET_MACHO)
28553 if (TARGET_64BIT)
28555 fnaddr = gen_rtx_UNSPEC (Pmode,
28556 gen_rtvec (1, addr),
28557 UNSPEC_GOTPCREL);
28558 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
28560 else
28562 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
28563 UNSPEC_GOT);
28564 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
28565 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
28566 fnaddr);
28568 fnaddr = gen_const_mem (Pmode, fnaddr);
28569 /* Pmode may not be the same as word_mode for x32, which
28570 doesn't support indirect branch via 32-bit memory slot.
28571 Since x32 GOT slot is 64 bit with zero upper 32 bits,
28572 indirect branch via x32 GOT slot is OK. */
28573 if (GET_MODE (fnaddr) != word_mode)
28574 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
28575 fnaddr = gen_rtx_MEM (QImode, fnaddr);
28580 /* Skip setting up RAX register for -mskip-rax-setup when there are no
28581 parameters passed in vector registers. */
28582 if (TARGET_64BIT
28583 && (INTVAL (callarg2) > 0
28584 || (INTVAL (callarg2) == 0
28585 && (TARGET_SSE || !flag_skip_rax_setup))))
28587 rtx al = gen_rtx_REG (QImode, AX_REG);
28588 emit_move_insn (al, callarg2);
28589 use_reg (&use, al);
28592 if (ix86_cmodel == CM_LARGE_PIC
28593 && !TARGET_PECOFF
28594 && MEM_P (fnaddr)
28595 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
28596 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
28597 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
28598 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
28599 branch via x32 GOT slot is OK. */
28600 else if (!(TARGET_X32
28601 && MEM_P (fnaddr)
28602 && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
28603 && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
28604 && (sibcall
28605 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
28606 : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
28608 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
28609 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
28612 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
28614 if (retval)
28616 /* We should add bounds as destination register in case
28617 pointer with bounds may be returned. */
28618 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
28620 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
28621 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
28622 if (GET_CODE (retval) == PARALLEL)
28624 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
28625 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
28626 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
28627 retval = chkp_join_splitted_slot (retval, par);
28629 else
28631 retval = gen_rtx_PARALLEL (VOIDmode,
28632 gen_rtvec (3, retval, b0, b1));
28633 chkp_put_regs_to_expr_list (retval);
28637 call = gen_rtx_SET (retval, call);
28639 vec[vec_len++] = call;
28641 if (pop)
28643 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
28644 pop = gen_rtx_SET (stack_pointer_rtx, pop);
28645 vec[vec_len++] = pop;
28648 if (cfun->machine->no_caller_saved_registers
28649 && (!fndecl
28650 || (!TREE_THIS_VOLATILE (fndecl)
28651 && !lookup_attribute ("no_caller_saved_registers",
28652 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))))
28654 static const char ix86_call_used_regs[] = CALL_USED_REGISTERS;
28655 bool is_64bit_ms_abi = (TARGET_64BIT
28656 && ix86_function_abi (fndecl) == MS_ABI);
28657 char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi);
28659 /* If there are no caller-saved registers, add all registers
28660 that are clobbered by the call which returns. */
28661 for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
28662 if (!fixed_regs[i]
28663 && (ix86_call_used_regs[i] == 1
28664 || (ix86_call_used_regs[i] & c_mask))
28665 && !STACK_REGNO_P (i)
28666 && !MMX_REGNO_P (i))
28667 clobber_reg (&use,
28668 gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i));
28670 else if (TARGET_64BIT_MS_ABI
28671 && (!callarg2 || INTVAL (callarg2) != -2))
28673 unsigned i;
28675 for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++)
28677 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
28678 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
28680 clobber_reg (&use, gen_rtx_REG (mode, regno));
28683 /* Set here, but it may get cleared later. */
28684 if (TARGET_CALL_MS2SYSV_XLOGUES)
28686 if (!TARGET_SSE)
28689 /* Don't break hot-patched functions. */
28690 else if (ix86_function_ms_hook_prologue (current_function_decl))
28693 /* TODO: Cases not yet examined. */
28694 else if (flag_split_stack)
28695 warn_once_call_ms2sysv_xlogues ("-fsplit-stack");
28697 else
28699 gcc_assert (!reload_completed);
28700 cfun->machine->call_ms2sysv = true;
28705 if (vec_len > 1)
28706 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
28707 call = emit_call_insn (call);
28708 if (use)
28709 CALL_INSN_FUNCTION_USAGE (call) = use;
28711 return call;
28714 /* Return true if the function being called was marked with attribute
28715 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
28716 to handle the non-PIC case in the backend because there is no easy
28717 interface for the front-end to force non-PLT calls to use the GOT.
28718 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
28719 to call the function marked "noplt" indirectly. */
28721 static bool
28722 ix86_nopic_noplt_attribute_p (rtx call_op)
28724 if (flag_pic || ix86_cmodel == CM_LARGE
28725 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
28726 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
28727 || SYMBOL_REF_LOCAL_P (call_op))
28728 return false;
28730 tree symbol_decl = SYMBOL_REF_DECL (call_op);
28732 if (!flag_plt
28733 || (symbol_decl != NULL_TREE
28734 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
28735 return true;
28737 return false;
28740 /* Output indirect branch via a call and return thunk. CALL_OP is a
28741 register which contains the branch target. XASM is the assembly
28742 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
28743 A normal call is converted to:
28745 call __x86_indirect_thunk_reg
28747 and a tail call is converted to:
28749 jmp __x86_indirect_thunk_reg
28752 static void
28753 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
28755 char thunk_name_buf[32];
28756 char *thunk_name;
28757 enum indirect_thunk_prefix need_prefix
28758 = indirect_thunk_need_prefix (current_output_insn);
28759 int regno = REGNO (call_op);
28761 if (cfun->machine->indirect_branch_type
28762 != indirect_branch_thunk_inline)
28764 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
28766 int i = regno;
28767 if (i >= FIRST_REX_INT_REG)
28768 i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1);
28769 if (need_prefix == indirect_thunk_prefix_bnd)
28770 indirect_thunks_bnd_used |= 1 << i;
28771 else
28772 indirect_thunks_used |= 1 << i;
28774 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
28775 thunk_name = thunk_name_buf;
28777 else
28778 thunk_name = NULL;
28780 if (sibcall_p)
28782 if (thunk_name != NULL)
28784 if (need_prefix == indirect_thunk_prefix_bnd)
28785 fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
28786 else
28787 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28789 else
28790 output_indirect_thunk (need_prefix, regno);
28792 else
28794 if (thunk_name != NULL)
28796 if (need_prefix == indirect_thunk_prefix_bnd)
28797 fprintf (asm_out_file, "\tbnd call\t%s\n", thunk_name);
28798 else
28799 fprintf (asm_out_file, "\tcall\t%s\n", thunk_name);
28800 return;
28803 char indirectlabel1[32];
28804 char indirectlabel2[32];
28806 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
28807 INDIRECT_LABEL,
28808 indirectlabelno++);
28809 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
28810 INDIRECT_LABEL,
28811 indirectlabelno++);
28813 /* Jump. */
28814 if (need_prefix == indirect_thunk_prefix_bnd)
28815 fputs ("\tbnd jmp\t", asm_out_file);
28816 else
28817 fputs ("\tjmp\t", asm_out_file);
28818 assemble_name_raw (asm_out_file, indirectlabel2);
28819 fputc ('\n', asm_out_file);
28821 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
28823 if (thunk_name != NULL)
28825 if (need_prefix == indirect_thunk_prefix_bnd)
28826 fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
28827 else
28828 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28830 else
28831 output_indirect_thunk (need_prefix, regno);
28833 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
28835 /* Call. */
28836 if (need_prefix == indirect_thunk_prefix_bnd)
28837 fputs ("\tbnd call\t", asm_out_file);
28838 else
28839 fputs ("\tcall\t", asm_out_file);
28840 assemble_name_raw (asm_out_file, indirectlabel1);
28841 fputc ('\n', asm_out_file);
28845 /* Output indirect branch via a call and return thunk. CALL_OP is
28846 the branch target. XASM is the assembly template for CALL_OP.
28847 Branch is a tail call if SIBCALL_P is true. A normal call is
28848 converted to:
28850 jmp L2
28852 push CALL_OP
28853 jmp __x86_indirect_thunk
28855 call L1
28857 and a tail call is converted to:
28859 push CALL_OP
28860 jmp __x86_indirect_thunk
28863 static void
28864 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
28865 bool sibcall_p)
28867 char thunk_name_buf[32];
28868 char *thunk_name;
28869 char push_buf[64];
28870 enum indirect_thunk_prefix need_prefix
28871 = indirect_thunk_need_prefix (current_output_insn);
28872 int regno = -1;
28874 if (cfun->machine->indirect_branch_type
28875 != indirect_branch_thunk_inline)
28877 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
28879 if (need_prefix == indirect_thunk_prefix_bnd)
28880 indirect_thunk_bnd_needed = true;
28881 else
28882 indirect_thunk_needed = true;
28884 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
28885 thunk_name = thunk_name_buf;
28887 else
28888 thunk_name = NULL;
28890 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
28891 TARGET_64BIT ? 'q' : 'l', xasm);
28893 if (sibcall_p)
28895 output_asm_insn (push_buf, &call_op);
28896 if (thunk_name != NULL)
28898 if (need_prefix == indirect_thunk_prefix_bnd)
28899 fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
28900 else
28901 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28903 else
28904 output_indirect_thunk (need_prefix, regno);
28906 else
28908 char indirectlabel1[32];
28909 char indirectlabel2[32];
28911 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
28912 INDIRECT_LABEL,
28913 indirectlabelno++);
28914 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
28915 INDIRECT_LABEL,
28916 indirectlabelno++);
28918 /* Jump. */
28919 if (need_prefix == indirect_thunk_prefix_bnd)
28920 fputs ("\tbnd jmp\t", asm_out_file);
28921 else
28922 fputs ("\tjmp\t", asm_out_file);
28923 assemble_name_raw (asm_out_file, indirectlabel2);
28924 fputc ('\n', asm_out_file);
28926 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
28928 /* An external function may be called via GOT, instead of PLT. */
28929 if (MEM_P (call_op))
28931 struct ix86_address parts;
28932 rtx addr = XEXP (call_op, 0);
28933 if (ix86_decompose_address (addr, &parts)
28934 && parts.base == stack_pointer_rtx)
28936 /* Since call will adjust stack by -UNITS_PER_WORD,
28937 we must convert "disp(stack, index, scale)" to
28938 "disp+UNITS_PER_WORD(stack, index, scale)". */
28939 if (parts.index)
28941 addr = gen_rtx_MULT (Pmode, parts.index,
28942 GEN_INT (parts.scale));
28943 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28944 addr);
28946 else
28947 addr = stack_pointer_rtx;
28949 rtx disp;
28950 if (parts.disp != NULL_RTX)
28951 disp = plus_constant (Pmode, parts.disp,
28952 UNITS_PER_WORD);
28953 else
28954 disp = GEN_INT (UNITS_PER_WORD);
28956 addr = gen_rtx_PLUS (Pmode, addr, disp);
28957 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
28961 output_asm_insn (push_buf, &call_op);
28963 if (thunk_name != NULL)
28965 if (need_prefix == indirect_thunk_prefix_bnd)
28966 fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
28967 else
28968 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28970 else
28971 output_indirect_thunk (need_prefix, regno);
28973 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
28975 /* Call. */
28976 if (need_prefix == indirect_thunk_prefix_bnd)
28977 fputs ("\tbnd call\t", asm_out_file);
28978 else
28979 fputs ("\tcall\t", asm_out_file);
28980 assemble_name_raw (asm_out_file, indirectlabel1);
28981 fputc ('\n', asm_out_file);
28985 /* Output indirect branch via a call and return thunk. CALL_OP is
28986 the branch target. XASM is the assembly template for CALL_OP.
28987 Branch is a tail call if SIBCALL_P is true. */
28989 static void
28990 ix86_output_indirect_branch (rtx call_op, const char *xasm,
28991 bool sibcall_p)
28993 if (REG_P (call_op))
28994 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
28995 else
28996 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
28999 /* Output indirect jump. CALL_OP is the jump target. */
29001 const char *
29002 ix86_output_indirect_jmp (rtx call_op)
29004 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
29006 /* We can't have red-zone since "call" in the indirect thunk
29007 pushes the return address onto stack, destroying red-zone. */
29008 if (ix86_red_zone_size != 0)
29009 gcc_unreachable ();
29011 ix86_output_indirect_branch (call_op, "%0", true);
29012 return "";
29014 else
29015 return "%!jmp\t%A0";
29018 /* Output function return. CALL_OP is the jump target. Add a REP
29019 prefix to RET if LONG_P is true and function return is kept. */
29021 const char *
29022 ix86_output_function_return (bool long_p)
29024 if (cfun->machine->function_return_type != indirect_branch_keep)
29026 char thunk_name[32];
29027 enum indirect_thunk_prefix need_prefix
29028 = indirect_thunk_need_prefix (current_output_insn);
29030 if (cfun->machine->function_return_type
29031 != indirect_branch_thunk_inline)
29033 bool need_thunk = (cfun->machine->function_return_type
29034 == indirect_branch_thunk);
29035 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
29036 true);
29037 if (need_prefix == indirect_thunk_prefix_bnd)
29039 indirect_return_bnd_needed |= need_thunk;
29040 fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
29042 else
29044 indirect_return_needed |= need_thunk;
29045 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
29048 else
29049 output_indirect_thunk (need_prefix, INVALID_REGNUM);
29051 return "";
29054 if (!long_p || ix86_bnd_prefixed_insn_p (current_output_insn))
29055 return "%!ret";
29057 return "rep%; ret";
29060 /* Output indirect function return. RET_OP is the function return
29061 target. */
29063 const char *
29064 ix86_output_indirect_function_return (rtx ret_op)
29066 if (cfun->machine->function_return_type != indirect_branch_keep)
29068 char thunk_name[32];
29069 enum indirect_thunk_prefix need_prefix
29070 = indirect_thunk_need_prefix (current_output_insn);
29071 unsigned int regno = REGNO (ret_op);
29072 gcc_assert (regno == CX_REG);
29074 if (cfun->machine->function_return_type
29075 != indirect_branch_thunk_inline)
29077 bool need_thunk = (cfun->machine->function_return_type
29078 == indirect_branch_thunk);
29079 indirect_thunk_name (thunk_name, regno, need_prefix, true);
29080 if (need_prefix == indirect_thunk_prefix_bnd)
29082 if (need_thunk)
29084 indirect_return_via_cx_bnd = true;
29085 indirect_thunks_bnd_used |= 1 << CX_REG;
29087 fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
29089 else
29091 if (need_thunk)
29093 indirect_return_via_cx = true;
29094 indirect_thunks_used |= 1 << CX_REG;
29096 fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
29099 else
29100 output_indirect_thunk (need_prefix, regno);
29102 return "";
29104 else
29105 return "%!jmp\t%A0";
29108 /* Split simple return with popping POPC bytes from stack to indirect
29109 branch with stack adjustment . */
29111 void
29112 ix86_split_simple_return_pop_internal (rtx popc)
29114 struct machine_function *m = cfun->machine;
29115 rtx ecx = gen_rtx_REG (SImode, CX_REG);
29116 rtx_insn *insn;
29118 /* There is no "pascal" calling convention in any 64bit ABI. */
29119 gcc_assert (!TARGET_64BIT);
29121 insn = emit_insn (gen_pop (ecx));
29122 m->fs.cfa_offset -= UNITS_PER_WORD;
29123 m->fs.sp_offset -= UNITS_PER_WORD;
29125 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
29126 x = gen_rtx_SET (stack_pointer_rtx, x);
29127 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
29128 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
29129 RTX_FRAME_RELATED_P (insn) = 1;
29131 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, popc);
29132 x = gen_rtx_SET (stack_pointer_rtx, x);
29133 insn = emit_insn (x);
29134 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
29135 RTX_FRAME_RELATED_P (insn) = 1;
29137 /* Now return address is in ECX. */
29138 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
29141 /* Output the assembly for a call instruction. */
29143 const char *
29144 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
29146 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
29147 bool output_indirect_p
29148 = (!TARGET_SEH
29149 && cfun->machine->indirect_branch_type != indirect_branch_keep);
29150 bool seh_nop_p = false;
29151 const char *xasm;
29153 if (SIBLING_CALL_P (insn))
29155 if (direct_p)
29157 if (ix86_nopic_noplt_attribute_p (call_op))
29159 direct_p = false;
29160 if (TARGET_64BIT)
29162 if (output_indirect_p)
29163 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
29164 else
29165 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
29167 else
29169 if (output_indirect_p)
29170 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
29171 else
29172 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
29175 else
29176 xasm = "%!jmp\t%P0";
29178 /* SEH epilogue detection requires the indirect branch case
29179 to include REX.W. */
29180 else if (TARGET_SEH)
29181 xasm = "%!rex.W jmp\t%A0";
29182 else
29184 if (output_indirect_p)
29185 xasm = "%0";
29186 else
29187 xasm = "%!jmp\t%A0";
29190 if (output_indirect_p && !direct_p)
29191 ix86_output_indirect_branch (call_op, xasm, true);
29192 else
29193 output_asm_insn (xasm, &call_op);
29194 return "";
29197 /* SEH unwinding can require an extra nop to be emitted in several
29198 circumstances. Determine if we have one of those. */
29199 if (TARGET_SEH)
29201 rtx_insn *i;
29203 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
29205 /* Prevent a catch region from being adjacent to a jump that would
29206 be interpreted as an epilogue sequence by the unwinder. */
29207 if (JUMP_P(i) && CROSSING_JUMP_P (i))
29209 seh_nop_p = true;
29210 break;
29213 /* If we get to another real insn, we don't need the nop. */
29214 if (INSN_P (i))
29215 break;
29217 /* If we get to the epilogue note, prevent a catch region from
29218 being adjacent to the standard epilogue sequence. If non-
29219 call-exceptions, we'll have done this during epilogue emission. */
29220 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
29221 && !flag_non_call_exceptions
29222 && !can_throw_internal (insn))
29224 seh_nop_p = true;
29225 break;
29229 /* If we didn't find a real insn following the call, prevent the
29230 unwinder from looking into the next function. */
29231 if (i == NULL)
29232 seh_nop_p = true;
29235 if (direct_p)
29237 if (ix86_nopic_noplt_attribute_p (call_op))
29239 direct_p = false;
29240 if (TARGET_64BIT)
29242 if (output_indirect_p)
29243 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
29244 else
29245 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
29247 else
29249 if (output_indirect_p)
29250 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
29251 else
29252 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
29255 else
29256 xasm = "%!call\t%P0";
29258 else
29260 if (output_indirect_p)
29261 xasm = "%0";
29262 else
29263 xasm = "%!call\t%A0";
29266 if (output_indirect_p && !direct_p)
29267 ix86_output_indirect_branch (call_op, xasm, false);
29268 else
29269 output_asm_insn (xasm, &call_op);
29271 if (seh_nop_p)
29272 return "nop";
29274 return "";
29277 /* Clear stack slot assignments remembered from previous functions.
29278 This is called from INIT_EXPANDERS once before RTL is emitted for each
29279 function. */
29281 static struct machine_function *
29282 ix86_init_machine_status (void)
29284 struct machine_function *f;
29286 f = ggc_cleared_alloc<machine_function> ();
29287 f->call_abi = ix86_abi;
29289 return f;
29292 /* Return a MEM corresponding to a stack slot with mode MODE.
29293 Allocate a new slot if necessary.
29295 The RTL for a function can have several slots available: N is
29296 which slot to use. */
29299 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
29301 struct stack_local_entry *s;
29303 gcc_assert (n < MAX_386_STACK_LOCALS);
29305 for (s = ix86_stack_locals; s; s = s->next)
29306 if (s->mode == mode && s->n == n)
29307 return validize_mem (copy_rtx (s->rtl));
29309 s = ggc_alloc<stack_local_entry> ();
29310 s->n = n;
29311 s->mode = mode;
29312 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
29314 s->next = ix86_stack_locals;
29315 ix86_stack_locals = s;
29316 return validize_mem (copy_rtx (s->rtl));
29319 static void
29320 ix86_instantiate_decls (void)
29322 struct stack_local_entry *s;
29324 for (s = ix86_stack_locals; s; s = s->next)
29325 if (s->rtl != NULL_RTX)
29326 instantiate_decl_rtl (s->rtl);
29329 /* Return the number used for encoding REG, in the range 0..7. */
29331 static int
29332 reg_encoded_number (rtx reg)
29334 unsigned regno = REGNO (reg);
29335 switch (regno)
29337 case AX_REG:
29338 return 0;
29339 case CX_REG:
29340 return 1;
29341 case DX_REG:
29342 return 2;
29343 case BX_REG:
29344 return 3;
29345 case SP_REG:
29346 return 4;
29347 case BP_REG:
29348 return 5;
29349 case SI_REG:
29350 return 6;
29351 case DI_REG:
29352 return 7;
29353 default:
29354 break;
29356 if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG))
29357 return regno - FIRST_STACK_REG;
29358 if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))
29359 return regno - FIRST_SSE_REG;
29360 if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG))
29361 return regno - FIRST_MMX_REG;
29362 if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
29363 return regno - FIRST_REX_SSE_REG;
29364 if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
29365 return regno - FIRST_REX_INT_REG;
29366 if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG))
29367 return regno - FIRST_MASK_REG;
29368 if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG))
29369 return regno - FIRST_BND_REG;
29370 return -1;
29373 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
29374 in its encoding if it could be relevant for ROP mitigation, otherwise
29375 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
29376 used for calculating it into them. */
29378 static int
29379 ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
29380 int *popno0 = 0, int *popno1 = 0)
29382 if (asm_noperands (PATTERN (insn)) >= 0)
29383 return -1;
29384 int has_modrm = get_attr_modrm (insn);
29385 if (!has_modrm)
29386 return -1;
29387 enum attr_modrm_class cls = get_attr_modrm_class (insn);
29388 rtx op0, op1;
29389 switch (cls)
29391 case MODRM_CLASS_OP02:
29392 gcc_assert (noperands >= 3);
29393 if (popno0)
29395 *popno0 = 0;
29396 *popno1 = 2;
29398 op0 = operands[0];
29399 op1 = operands[2];
29400 break;
29401 case MODRM_CLASS_OP01:
29402 gcc_assert (noperands >= 2);
29403 if (popno0)
29405 *popno0 = 0;
29406 *popno1 = 1;
29408 op0 = operands[0];
29409 op1 = operands[1];
29410 break;
29411 default:
29412 return -1;
29414 if (REG_P (op0) && REG_P (op1))
29416 int enc0 = reg_encoded_number (op0);
29417 int enc1 = reg_encoded_number (op1);
29418 return 0xc0 + (enc1 << 3) + enc0;
29420 return -1;
29423 /* Check whether x86 address PARTS is a pc-relative address. */
29425 bool
29426 ix86_rip_relative_addr_p (struct ix86_address *parts)
29428 rtx base, index, disp;
29430 base = parts->base;
29431 index = parts->index;
29432 disp = parts->disp;
29434 if (disp && !base && !index)
29436 if (TARGET_64BIT)
29438 rtx symbol = disp;
29440 if (GET_CODE (disp) == CONST)
29441 symbol = XEXP (disp, 0);
29442 if (GET_CODE (symbol) == PLUS
29443 && CONST_INT_P (XEXP (symbol, 1)))
29444 symbol = XEXP (symbol, 0);
29446 if (GET_CODE (symbol) == LABEL_REF
29447 || (GET_CODE (symbol) == SYMBOL_REF
29448 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
29449 || (GET_CODE (symbol) == UNSPEC
29450 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
29451 || XINT (symbol, 1) == UNSPEC_PCREL
29452 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
29453 return true;
29456 return false;
29459 /* Calculate the length of the memory address in the instruction encoding.
29460 Includes addr32 prefix, does not include the one-byte modrm, opcode,
29461 or other prefixes. We never generate addr32 prefix for LEA insn. */
29464 memory_address_length (rtx addr, bool lea)
29466 struct ix86_address parts;
29467 rtx base, index, disp;
29468 int len;
29469 int ok;
29471 if (GET_CODE (addr) == PRE_DEC
29472 || GET_CODE (addr) == POST_INC
29473 || GET_CODE (addr) == PRE_MODIFY
29474 || GET_CODE (addr) == POST_MODIFY)
29475 return 0;
29477 ok = ix86_decompose_address (addr, &parts);
29478 gcc_assert (ok);
29480 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
29482 /* If this is not LEA instruction, add the length of addr32 prefix. */
29483 if (TARGET_64BIT && !lea
29484 && (SImode_address_operand (addr, VOIDmode)
29485 || (parts.base && GET_MODE (parts.base) == SImode)
29486 || (parts.index && GET_MODE (parts.index) == SImode)))
29487 len++;
29489 base = parts.base;
29490 index = parts.index;
29491 disp = parts.disp;
29493 if (base && SUBREG_P (base))
29494 base = SUBREG_REG (base);
29495 if (index && SUBREG_P (index))
29496 index = SUBREG_REG (index);
29498 gcc_assert (base == NULL_RTX || REG_P (base));
29499 gcc_assert (index == NULL_RTX || REG_P (index));
29501 /* Rule of thumb:
29502 - esp as the base always wants an index,
29503 - ebp as the base always wants a displacement,
29504 - r12 as the base always wants an index,
29505 - r13 as the base always wants a displacement. */
29507 /* Register Indirect. */
29508 if (base && !index && !disp)
29510 /* esp (for its index) and ebp (for its displacement) need
29511 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
29512 code. */
29513 if (base == arg_pointer_rtx
29514 || base == frame_pointer_rtx
29515 || REGNO (base) == SP_REG
29516 || REGNO (base) == BP_REG
29517 || REGNO (base) == R12_REG
29518 || REGNO (base) == R13_REG)
29519 len++;
29522 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
29523 is not disp32, but disp32(%rip), so for disp32
29524 SIB byte is needed, unless print_operand_address
29525 optimizes it into disp32(%rip) or (%rip) is implied
29526 by UNSPEC. */
29527 else if (disp && !base && !index)
29529 len += 4;
29530 if (!ix86_rip_relative_addr_p (&parts))
29531 len++;
29533 else
29535 /* Find the length of the displacement constant. */
29536 if (disp)
29538 if (base && satisfies_constraint_K (disp))
29539 len += 1;
29540 else
29541 len += 4;
29543 /* ebp always wants a displacement. Similarly r13. */
29544 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
29545 len++;
29547 /* An index requires the two-byte modrm form.... */
29548 if (index
29549 /* ...like esp (or r12), which always wants an index. */
29550 || base == arg_pointer_rtx
29551 || base == frame_pointer_rtx
29552 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
29553 len++;
29556 return len;
29559 /* Compute default value for "length_immediate" attribute. When SHORTFORM
29560 is set, expect that insn have 8bit immediate alternative. */
29562 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
29564 int len = 0;
29565 int i;
29566 extract_insn_cached (insn);
29567 for (i = recog_data.n_operands - 1; i >= 0; --i)
29568 if (CONSTANT_P (recog_data.operand[i]))
29570 enum attr_mode mode = get_attr_mode (insn);
29572 gcc_assert (!len);
29573 if (shortform && CONST_INT_P (recog_data.operand[i]))
29575 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
29576 switch (mode)
29578 case MODE_QI:
29579 len = 1;
29580 continue;
29581 case MODE_HI:
29582 ival = trunc_int_for_mode (ival, HImode);
29583 break;
29584 case MODE_SI:
29585 ival = trunc_int_for_mode (ival, SImode);
29586 break;
29587 default:
29588 break;
29590 if (IN_RANGE (ival, -128, 127))
29592 len = 1;
29593 continue;
29596 switch (mode)
29598 case MODE_QI:
29599 len = 1;
29600 break;
29601 case MODE_HI:
29602 len = 2;
29603 break;
29604 case MODE_SI:
29605 len = 4;
29606 break;
29607 /* Immediates for DImode instructions are encoded
29608 as 32bit sign extended values. */
29609 case MODE_DI:
29610 len = 4;
29611 break;
29612 default:
29613 fatal_insn ("unknown insn mode", insn);
29616 return len;
29619 /* Compute default value for "length_address" attribute. */
29621 ix86_attr_length_address_default (rtx_insn *insn)
29623 int i;
29625 if (get_attr_type (insn) == TYPE_LEA)
29627 rtx set = PATTERN (insn), addr;
29629 if (GET_CODE (set) == PARALLEL)
29630 set = XVECEXP (set, 0, 0);
29632 gcc_assert (GET_CODE (set) == SET);
29634 addr = SET_SRC (set);
29636 return memory_address_length (addr, true);
29639 extract_insn_cached (insn);
29640 for (i = recog_data.n_operands - 1; i >= 0; --i)
29642 rtx op = recog_data.operand[i];
29643 if (MEM_P (op))
29645 constrain_operands_cached (insn, reload_completed);
29646 if (which_alternative != -1)
29648 const char *constraints = recog_data.constraints[i];
29649 int alt = which_alternative;
29651 while (*constraints == '=' || *constraints == '+')
29652 constraints++;
29653 while (alt-- > 0)
29654 while (*constraints++ != ',')
29656 /* Skip ignored operands. */
29657 if (*constraints == 'X')
29658 continue;
29661 int len = memory_address_length (XEXP (op, 0), false);
29663 /* Account for segment prefix for non-default addr spaces. */
29664 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
29665 len++;
29667 return len;
29670 return 0;
29673 /* Compute default value for "length_vex" attribute. It includes
29674 2 or 3 byte VEX prefix and 1 opcode byte. */
29677 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
29678 bool has_vex_w)
29680 int i;
29682 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
29683 byte VEX prefix. */
29684 if (!has_0f_opcode || has_vex_w)
29685 return 3 + 1;
29687 /* We can always use 2 byte VEX prefix in 32bit. */
29688 if (!TARGET_64BIT)
29689 return 2 + 1;
29691 extract_insn_cached (insn);
29693 for (i = recog_data.n_operands - 1; i >= 0; --i)
29694 if (REG_P (recog_data.operand[i]))
29696 /* REX.W bit uses 3 byte VEX prefix. */
29697 if (GET_MODE (recog_data.operand[i]) == DImode
29698 && GENERAL_REG_P (recog_data.operand[i]))
29699 return 3 + 1;
29701 else
29703 /* REX.X or REX.B bits use 3 byte VEX prefix. */
29704 if (MEM_P (recog_data.operand[i])
29705 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
29706 return 3 + 1;
29709 return 2 + 1;
29713 static bool
29714 ix86_class_likely_spilled_p (reg_class_t);
29716 /* Returns true if lhs of insn is HW function argument register and set up
29717 is_spilled to true if it is likely spilled HW register. */
29718 static bool
29719 insn_is_function_arg (rtx insn, bool* is_spilled)
29721 rtx dst;
29723 if (!NONDEBUG_INSN_P (insn))
29724 return false;
29725 /* Call instructions are not movable, ignore it. */
29726 if (CALL_P (insn))
29727 return false;
29728 insn = PATTERN (insn);
29729 if (GET_CODE (insn) == PARALLEL)
29730 insn = XVECEXP (insn, 0, 0);
29731 if (GET_CODE (insn) != SET)
29732 return false;
29733 dst = SET_DEST (insn);
29734 if (REG_P (dst) && HARD_REGISTER_P (dst)
29735 && ix86_function_arg_regno_p (REGNO (dst)))
29737 /* Is it likely spilled HW register? */
29738 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
29739 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
29740 *is_spilled = true;
29741 return true;
29743 return false;
29746 /* Add output dependencies for chain of function adjacent arguments if only
29747 there is a move to likely spilled HW register. Return first argument
29748 if at least one dependence was added or NULL otherwise. */
29749 static rtx_insn *
29750 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
29752 rtx_insn *insn;
29753 rtx_insn *last = call;
29754 rtx_insn *first_arg = NULL;
29755 bool is_spilled = false;
29757 head = PREV_INSN (head);
29759 /* Find nearest to call argument passing instruction. */
29760 while (true)
29762 last = PREV_INSN (last);
29763 if (last == head)
29764 return NULL;
29765 if (!NONDEBUG_INSN_P (last))
29766 continue;
29767 if (insn_is_function_arg (last, &is_spilled))
29768 break;
29769 return NULL;
29772 first_arg = last;
29773 while (true)
29775 insn = PREV_INSN (last);
29776 if (!INSN_P (insn))
29777 break;
29778 if (insn == head)
29779 break;
29780 if (!NONDEBUG_INSN_P (insn))
29782 last = insn;
29783 continue;
29785 if (insn_is_function_arg (insn, &is_spilled))
29787 /* Add output depdendence between two function arguments if chain
29788 of output arguments contains likely spilled HW registers. */
29789 if (is_spilled)
29790 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
29791 first_arg = last = insn;
29793 else
29794 break;
29796 if (!is_spilled)
29797 return NULL;
29798 return first_arg;
29801 /* Add output or anti dependency from insn to first_arg to restrict its code
29802 motion. */
29803 static void
29804 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
29806 rtx set;
29807 rtx tmp;
29809 /* Add anti dependencies for bounds stores. */
29810 if (INSN_P (insn)
29811 && GET_CODE (PATTERN (insn)) == PARALLEL
29812 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
29813 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
29815 add_dependence (first_arg, insn, REG_DEP_ANTI);
29816 return;
29819 set = single_set (insn);
29820 if (!set)
29821 return;
29822 tmp = SET_DEST (set);
29823 if (REG_P (tmp))
29825 /* Add output dependency to the first function argument. */
29826 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
29827 return;
29829 /* Add anti dependency. */
29830 add_dependence (first_arg, insn, REG_DEP_ANTI);
29833 /* Avoid cross block motion of function argument through adding dependency
29834 from the first non-jump instruction in bb. */
29835 static void
29836 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
29838 rtx_insn *insn = BB_END (bb);
29840 while (insn)
29842 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
29844 rtx set = single_set (insn);
29845 if (set)
29847 avoid_func_arg_motion (arg, insn);
29848 return;
29851 if (insn == BB_HEAD (bb))
29852 return;
29853 insn = PREV_INSN (insn);
29857 /* Hook for pre-reload schedule - avoid motion of function arguments
29858 passed in likely spilled HW registers. */
29859 static void
29860 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
29862 rtx_insn *insn;
29863 rtx_insn *first_arg = NULL;
29864 if (reload_completed)
29865 return;
29866 while (head != tail && DEBUG_INSN_P (head))
29867 head = NEXT_INSN (head);
29868 for (insn = tail; insn != head; insn = PREV_INSN (insn))
29869 if (INSN_P (insn) && CALL_P (insn))
29871 first_arg = add_parameter_dependencies (insn, head);
29872 if (first_arg)
29874 /* Add dependee for first argument to predecessors if only
29875 region contains more than one block. */
29876 basic_block bb = BLOCK_FOR_INSN (insn);
29877 int rgn = CONTAINING_RGN (bb->index);
29878 int nr_blks = RGN_NR_BLOCKS (rgn);
29879 /* Skip trivial regions and region head blocks that can have
29880 predecessors outside of region. */
29881 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
29883 edge e;
29884 edge_iterator ei;
29886 /* Regions are SCCs with the exception of selective
29887 scheduling with pipelining of outer blocks enabled.
29888 So also check that immediate predecessors of a non-head
29889 block are in the same region. */
29890 FOR_EACH_EDGE (e, ei, bb->preds)
29892 /* Avoid creating of loop-carried dependencies through
29893 using topological ordering in the region. */
29894 if (rgn == CONTAINING_RGN (e->src->index)
29895 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
29896 add_dependee_for_func_arg (first_arg, e->src);
29899 insn = first_arg;
29900 if (insn == head)
29901 break;
29904 else if (first_arg)
29905 avoid_func_arg_motion (first_arg, insn);
29908 /* Hook for pre-reload schedule - set priority of moves from likely spilled
29909 HW registers to maximum, to schedule them at soon as possible. These are
29910 moves from function argument registers at the top of the function entry
29911 and moves from function return value registers after call. */
29912 static int
29913 ix86_adjust_priority (rtx_insn *insn, int priority)
29915 rtx set;
29917 if (reload_completed)
29918 return priority;
29920 if (!NONDEBUG_INSN_P (insn))
29921 return priority;
29923 set = single_set (insn);
29924 if (set)
29926 rtx tmp = SET_SRC (set);
29927 if (REG_P (tmp)
29928 && HARD_REGISTER_P (tmp)
29929 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
29930 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
29931 return current_sched_info->sched_max_insns_priority;
29934 return priority;
29937 /* Prepare for scheduling pass. */
29938 static void
29939 ix86_sched_init_global (FILE *, int, int)
29941 /* Install scheduling hooks for current CPU. Some of these hooks are used
29942 in time-critical parts of the scheduler, so we only set them up when
29943 they are actually used. */
29944 switch (ix86_tune)
29946 case PROCESSOR_CORE2:
29947 case PROCESSOR_NEHALEM:
29948 case PROCESSOR_SANDYBRIDGE:
29949 case PROCESSOR_HASWELL:
29950 case PROCESSOR_GENERIC:
29951 /* Do not perform multipass scheduling for pre-reload schedule
29952 to save compile time. */
29953 if (reload_completed)
29955 ix86_core2i7_init_hooks ();
29956 break;
29958 /* Fall through. */
29959 default:
29960 targetm.sched.dfa_post_advance_cycle = NULL;
29961 targetm.sched.first_cycle_multipass_init = NULL;
29962 targetm.sched.first_cycle_multipass_begin = NULL;
29963 targetm.sched.first_cycle_multipass_issue = NULL;
29964 targetm.sched.first_cycle_multipass_backtrack = NULL;
29965 targetm.sched.first_cycle_multipass_end = NULL;
29966 targetm.sched.first_cycle_multipass_fini = NULL;
29967 break;
29972 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
29974 static HOST_WIDE_INT
29975 ix86_static_rtx_alignment (machine_mode mode)
29977 if (mode == DFmode)
29978 return 64;
29979 if (ALIGN_MODE_128 (mode))
29980 return MAX (128, GET_MODE_ALIGNMENT (mode));
29981 return GET_MODE_ALIGNMENT (mode);
29984 /* Implement TARGET_CONSTANT_ALIGNMENT. */
29986 static HOST_WIDE_INT
29987 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
29989 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
29990 || TREE_CODE (exp) == INTEGER_CST)
29992 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
29993 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
29994 return MAX (mode_align, align);
29996 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
29997 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
29998 return BITS_PER_WORD;
30000 return align;
30003 /* Implement TARGET_EMPTY_RECORD_P. */
30005 static bool
30006 ix86_is_empty_record (const_tree type)
30008 if (!TARGET_64BIT)
30009 return false;
30010 return default_is_empty_record (type);
30013 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
30015 static void
30016 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
30018 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
30020 if (!cum->warn_empty)
30021 return;
30023 if (!TYPE_EMPTY_P (type))
30024 return;
30026 const_tree ctx = get_ultimate_context (cum->decl);
30027 if (ctx != NULL_TREE
30028 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
30029 return;
30031 /* If the actual size of the type is zero, then there is no change
30032 in how objects of this size are passed. */
30033 if (int_size_in_bytes (type) == 0)
30034 return;
30036 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
30037 "changes in -fabi-version=12 (GCC 8)", type);
30039 /* Only warn once. */
30040 cum->warn_empty = false;
30043 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
30044 the data type, and ALIGN is the alignment that the object would
30045 ordinarily have. */
30047 static int
30048 iamcu_alignment (tree type, int align)
30050 machine_mode mode;
30052 if (align < 32 || TYPE_USER_ALIGN (type))
30053 return align;
30055 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
30056 bytes. */
30057 mode = TYPE_MODE (strip_array_types (type));
30058 switch (GET_MODE_CLASS (mode))
30060 case MODE_INT:
30061 case MODE_COMPLEX_INT:
30062 case MODE_COMPLEX_FLOAT:
30063 case MODE_FLOAT:
30064 case MODE_DECIMAL_FLOAT:
30065 return 32;
30066 default:
30067 return align;
30071 /* Compute the alignment for a static variable.
30072 TYPE is the data type, and ALIGN is the alignment that
30073 the object would ordinarily have. The value of this function is used
30074 instead of that alignment to align the object. */
30077 ix86_data_alignment (tree type, int align, bool opt)
30079 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
30080 for symbols from other compilation units or symbols that don't need
30081 to bind locally. In order to preserve some ABI compatibility with
30082 those compilers, ensure we don't decrease alignment from what we
30083 used to assume. */
30085 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
30087 /* A data structure, equal or greater than the size of a cache line
30088 (64 bytes in the Pentium 4 and other recent Intel processors, including
30089 processors based on Intel Core microarchitecture) should be aligned
30090 so that its base address is a multiple of a cache line size. */
30092 int max_align
30093 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
30095 if (max_align < BITS_PER_WORD)
30096 max_align = BITS_PER_WORD;
30098 switch (ix86_align_data_type)
30100 case ix86_align_data_type_abi: opt = false; break;
30101 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
30102 case ix86_align_data_type_cacheline: break;
30105 if (TARGET_IAMCU)
30106 align = iamcu_alignment (type, align);
30108 if (opt
30109 && AGGREGATE_TYPE_P (type)
30110 && TYPE_SIZE (type)
30111 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
30113 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
30114 && align < max_align_compat)
30115 align = max_align_compat;
30116 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
30117 && align < max_align)
30118 align = max_align;
30121 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
30122 to 16byte boundary. */
30123 if (TARGET_64BIT)
30125 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
30126 && TYPE_SIZE (type)
30127 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
30128 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
30129 && align < 128)
30130 return 128;
30133 if (!opt)
30134 return align;
30136 if (TREE_CODE (type) == ARRAY_TYPE)
30138 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
30139 return 64;
30140 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
30141 return 128;
30143 else if (TREE_CODE (type) == COMPLEX_TYPE)
30146 if (TYPE_MODE (type) == DCmode && align < 64)
30147 return 64;
30148 if ((TYPE_MODE (type) == XCmode
30149 || TYPE_MODE (type) == TCmode) && align < 128)
30150 return 128;
30152 else if ((TREE_CODE (type) == RECORD_TYPE
30153 || TREE_CODE (type) == UNION_TYPE
30154 || TREE_CODE (type) == QUAL_UNION_TYPE)
30155 && TYPE_FIELDS (type))
30157 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
30158 return 64;
30159 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
30160 return 128;
30162 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
30163 || TREE_CODE (type) == INTEGER_TYPE)
30165 if (TYPE_MODE (type) == DFmode && align < 64)
30166 return 64;
30167 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
30168 return 128;
30171 return align;
30174 /* Compute the alignment for a local variable or a stack slot. EXP is
30175 the data type or decl itself, MODE is the widest mode available and
30176 ALIGN is the alignment that the object would ordinarily have. The
30177 value of this macro is used instead of that alignment to align the
30178 object. */
30180 unsigned int
30181 ix86_local_alignment (tree exp, machine_mode mode,
30182 unsigned int align)
30184 tree type, decl;
30186 if (exp && DECL_P (exp))
30188 type = TREE_TYPE (exp);
30189 decl = exp;
30191 else
30193 type = exp;
30194 decl = NULL;
30197 /* Don't do dynamic stack realignment for long long objects with
30198 -mpreferred-stack-boundary=2. */
30199 if (!TARGET_64BIT
30200 && align == 64
30201 && ix86_preferred_stack_boundary < 64
30202 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
30203 && (!type || !TYPE_USER_ALIGN (type))
30204 && (!decl || !DECL_USER_ALIGN (decl)))
30205 align = 32;
30207 /* If TYPE is NULL, we are allocating a stack slot for caller-save
30208 register in MODE. We will return the largest alignment of XF
30209 and DF. */
30210 if (!type)
30212 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
30213 align = GET_MODE_ALIGNMENT (DFmode);
30214 return align;
30217 /* Don't increase alignment for Intel MCU psABI. */
30218 if (TARGET_IAMCU)
30219 return align;
30221 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
30222 to 16byte boundary. Exact wording is:
30224 An array uses the same alignment as its elements, except that a local or
30225 global array variable of length at least 16 bytes or
30226 a C99 variable-length array variable always has alignment of at least 16 bytes.
30228 This was added to allow use of aligned SSE instructions at arrays. This
30229 rule is meant for static storage (where compiler can not do the analysis
30230 by itself). We follow it for automatic variables only when convenient.
30231 We fully control everything in the function compiled and functions from
30232 other unit can not rely on the alignment.
30234 Exclude va_list type. It is the common case of local array where
30235 we can not benefit from the alignment.
30237 TODO: Probably one should optimize for size only when var is not escaping. */
30238 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
30239 && TARGET_SSE)
30241 if (AGGREGATE_TYPE_P (type)
30242 && (va_list_type_node == NULL_TREE
30243 || (TYPE_MAIN_VARIANT (type)
30244 != TYPE_MAIN_VARIANT (va_list_type_node)))
30245 && TYPE_SIZE (type)
30246 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
30247 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
30248 && align < 128)
30249 return 128;
30251 if (TREE_CODE (type) == ARRAY_TYPE)
30253 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
30254 return 64;
30255 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
30256 return 128;
30258 else if (TREE_CODE (type) == COMPLEX_TYPE)
30260 if (TYPE_MODE (type) == DCmode && align < 64)
30261 return 64;
30262 if ((TYPE_MODE (type) == XCmode
30263 || TYPE_MODE (type) == TCmode) && align < 128)
30264 return 128;
30266 else if ((TREE_CODE (type) == RECORD_TYPE
30267 || TREE_CODE (type) == UNION_TYPE
30268 || TREE_CODE (type) == QUAL_UNION_TYPE)
30269 && TYPE_FIELDS (type))
30271 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
30272 return 64;
30273 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
30274 return 128;
30276 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
30277 || TREE_CODE (type) == INTEGER_TYPE)
30280 if (TYPE_MODE (type) == DFmode && align < 64)
30281 return 64;
30282 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
30283 return 128;
30285 return align;
30288 /* Compute the minimum required alignment for dynamic stack realignment
30289 purposes for a local variable, parameter or a stack slot. EXP is
30290 the data type or decl itself, MODE is its mode and ALIGN is the
30291 alignment that the object would ordinarily have. */
30293 unsigned int
30294 ix86_minimum_alignment (tree exp, machine_mode mode,
30295 unsigned int align)
30297 tree type, decl;
30299 if (exp && DECL_P (exp))
30301 type = TREE_TYPE (exp);
30302 decl = exp;
30304 else
30306 type = exp;
30307 decl = NULL;
30310 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
30311 return align;
30313 /* Don't do dynamic stack realignment for long long objects with
30314 -mpreferred-stack-boundary=2. */
30315 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
30316 && (!type || !TYPE_USER_ALIGN (type))
30317 && (!decl || !DECL_USER_ALIGN (decl)))
30319 gcc_checking_assert (!TARGET_STV);
30320 return 32;
30323 return align;
30326 /* Find a location for the static chain incoming to a nested function.
30327 This is a register, unless all free registers are used by arguments. */
30329 static rtx
30330 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
30332 unsigned regno;
30334 if (TARGET_64BIT)
30336 /* We always use R10 in 64-bit mode. */
30337 regno = R10_REG;
30339 else
30341 const_tree fntype, fndecl;
30342 unsigned int ccvt;
30344 /* By default in 32-bit mode we use ECX to pass the static chain. */
30345 regno = CX_REG;
30347 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
30349 fntype = TREE_TYPE (fndecl_or_type);
30350 fndecl = fndecl_or_type;
30352 else
30354 fntype = fndecl_or_type;
30355 fndecl = NULL;
30358 ccvt = ix86_get_callcvt (fntype);
30359 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
30361 /* Fastcall functions use ecx/edx for arguments, which leaves
30362 us with EAX for the static chain.
30363 Thiscall functions use ecx for arguments, which also
30364 leaves us with EAX for the static chain. */
30365 regno = AX_REG;
30367 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
30369 /* Thiscall functions use ecx for arguments, which leaves
30370 us with EAX and EDX for the static chain.
30371 We are using for abi-compatibility EAX. */
30372 regno = AX_REG;
30374 else if (ix86_function_regparm (fntype, fndecl) == 3)
30376 /* For regparm 3, we have no free call-clobbered registers in
30377 which to store the static chain. In order to implement this,
30378 we have the trampoline push the static chain to the stack.
30379 However, we can't push a value below the return address when
30380 we call the nested function directly, so we have to use an
30381 alternate entry point. For this we use ESI, and have the
30382 alternate entry point push ESI, so that things appear the
30383 same once we're executing the nested function. */
30384 if (incoming_p)
30386 if (fndecl == current_function_decl
30387 && !ix86_static_chain_on_stack)
30389 gcc_assert (!reload_completed);
30390 ix86_static_chain_on_stack = true;
30392 return gen_frame_mem (SImode,
30393 plus_constant (Pmode,
30394 arg_pointer_rtx, -8));
30396 regno = SI_REG;
30400 return gen_rtx_REG (Pmode, regno);
30403 /* Emit RTL insns to initialize the variable parts of a trampoline.
30404 FNDECL is the decl of the target address; M_TRAMP is a MEM for
30405 the trampoline, and CHAIN_VALUE is an RTX for the static chain
30406 to be passed to the target function. */
30408 static void
30409 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
30411 rtx mem, fnaddr;
30412 int opcode;
30413 int offset = 0;
30415 fnaddr = XEXP (DECL_RTL (fndecl), 0);
30417 if (TARGET_64BIT)
30419 int size;
30421 /* Load the function address to r11. Try to load address using
30422 the shorter movl instead of movabs. We may want to support
30423 movq for kernel mode, but kernel does not use trampolines at
30424 the moment. FNADDR is a 32bit address and may not be in
30425 DImode when ptr_mode == SImode. Always use movl in this
30426 case. */
30427 if (ptr_mode == SImode
30428 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
30430 fnaddr = copy_addr_to_reg (fnaddr);
30432 mem = adjust_address (m_tramp, HImode, offset);
30433 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
30435 mem = adjust_address (m_tramp, SImode, offset + 2);
30436 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
30437 offset += 6;
30439 else
30441 mem = adjust_address (m_tramp, HImode, offset);
30442 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
30444 mem = adjust_address (m_tramp, DImode, offset + 2);
30445 emit_move_insn (mem, fnaddr);
30446 offset += 10;
30449 /* Load static chain using movabs to r10. Use the shorter movl
30450 instead of movabs when ptr_mode == SImode. */
30451 if (ptr_mode == SImode)
30453 opcode = 0xba41;
30454 size = 6;
30456 else
30458 opcode = 0xba49;
30459 size = 10;
30462 mem = adjust_address (m_tramp, HImode, offset);
30463 emit_move_insn (mem, gen_int_mode (opcode, HImode));
30465 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
30466 emit_move_insn (mem, chain_value);
30467 offset += size;
30469 /* Jump to r11; the last (unused) byte is a nop, only there to
30470 pad the write out to a single 32-bit store. */
30471 mem = adjust_address (m_tramp, SImode, offset);
30472 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
30473 offset += 4;
30475 else
30477 rtx disp, chain;
30479 /* Depending on the static chain location, either load a register
30480 with a constant, or push the constant to the stack. All of the
30481 instructions are the same size. */
30482 chain = ix86_static_chain (fndecl, true);
30483 if (REG_P (chain))
30485 switch (REGNO (chain))
30487 case AX_REG:
30488 opcode = 0xb8; break;
30489 case CX_REG:
30490 opcode = 0xb9; break;
30491 default:
30492 gcc_unreachable ();
30495 else
30496 opcode = 0x68;
30498 mem = adjust_address (m_tramp, QImode, offset);
30499 emit_move_insn (mem, gen_int_mode (opcode, QImode));
30501 mem = adjust_address (m_tramp, SImode, offset + 1);
30502 emit_move_insn (mem, chain_value);
30503 offset += 5;
30505 mem = adjust_address (m_tramp, QImode, offset);
30506 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
30508 mem = adjust_address (m_tramp, SImode, offset + 1);
30510 /* Compute offset from the end of the jmp to the target function.
30511 In the case in which the trampoline stores the static chain on
30512 the stack, we need to skip the first insn which pushes the
30513 (call-saved) register static chain; this push is 1 byte. */
30514 offset += 5;
30515 disp = expand_binop (SImode, sub_optab, fnaddr,
30516 plus_constant (Pmode, XEXP (m_tramp, 0),
30517 offset - (MEM_P (chain) ? 1 : 0)),
30518 NULL_RTX, 1, OPTAB_DIRECT);
30519 emit_move_insn (mem, disp);
30522 gcc_assert (offset <= TRAMPOLINE_SIZE);
30524 #ifdef HAVE_ENABLE_EXECUTE_STACK
30525 #ifdef CHECK_EXECUTE_STACK_ENABLED
30526 if (CHECK_EXECUTE_STACK_ENABLED)
30527 #endif
30528 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
30529 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
30530 #endif
30533 static bool
30534 ix86_allocate_stack_slots_for_args (void)
30536 /* Naked functions should not allocate stack slots for arguments. */
30537 return !ix86_function_naked (current_function_decl);
30540 static bool
30541 ix86_warn_func_return (tree decl)
30543 /* Naked functions are implemented entirely in assembly, including the
30544 return sequence, so suppress warnings about this. */
30545 return !ix86_function_naked (decl);
30548 /* The following file contains several enumerations and data structures
30549 built from the definitions in i386-builtin-types.def. */
30551 #include "i386-builtin-types.inc"
30553 /* Table for the ix86 builtin non-function types. */
30554 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
30556 /* Retrieve an element from the above table, building some of
30557 the types lazily. */
30559 static tree
30560 ix86_get_builtin_type (enum ix86_builtin_type tcode)
30562 unsigned int index;
30563 tree type, itype;
30565 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
30567 type = ix86_builtin_type_tab[(int) tcode];
30568 if (type != NULL)
30569 return type;
30571 gcc_assert (tcode > IX86_BT_LAST_PRIM);
30572 if (tcode <= IX86_BT_LAST_VECT)
30574 machine_mode mode;
30576 index = tcode - IX86_BT_LAST_PRIM - 1;
30577 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
30578 mode = ix86_builtin_type_vect_mode[index];
30580 type = build_vector_type_for_mode (itype, mode);
30582 else
30584 int quals;
30586 index = tcode - IX86_BT_LAST_VECT - 1;
30587 if (tcode <= IX86_BT_LAST_PTR)
30588 quals = TYPE_UNQUALIFIED;
30589 else
30590 quals = TYPE_QUAL_CONST;
30592 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
30593 if (quals != TYPE_UNQUALIFIED)
30594 itype = build_qualified_type (itype, quals);
30596 type = build_pointer_type (itype);
30599 ix86_builtin_type_tab[(int) tcode] = type;
30600 return type;
30603 /* Table for the ix86 builtin function types. */
30604 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
30606 /* Retrieve an element from the above table, building some of
30607 the types lazily. */
30609 static tree
30610 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
30612 tree type;
30614 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
30616 type = ix86_builtin_func_type_tab[(int) tcode];
30617 if (type != NULL)
30618 return type;
30620 if (tcode <= IX86_BT_LAST_FUNC)
30622 unsigned start = ix86_builtin_func_start[(int) tcode];
30623 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
30624 tree rtype, atype, args = void_list_node;
30625 unsigned i;
30627 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
30628 for (i = after - 1; i > start; --i)
30630 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
30631 args = tree_cons (NULL, atype, args);
30634 type = build_function_type (rtype, args);
30636 else
30638 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
30639 enum ix86_builtin_func_type icode;
30641 icode = ix86_builtin_func_alias_base[index];
30642 type = ix86_get_builtin_func_type (icode);
30645 ix86_builtin_func_type_tab[(int) tcode] = type;
30646 return type;
30650 /* Codes for all the SSE/MMX builtins. Builtins not mentioned in any
30651 bdesc_* arrays below should come first, then builtins for each bdesc_*
30652 array in ascending order, so that we can use direct array accesses. */
30653 enum ix86_builtins
30655 IX86_BUILTIN_MASKMOVQ,
30656 IX86_BUILTIN_LDMXCSR,
30657 IX86_BUILTIN_STMXCSR,
30658 IX86_BUILTIN_MASKMOVDQU,
30659 IX86_BUILTIN_PSLLDQ128,
30660 IX86_BUILTIN_CLFLUSH,
30661 IX86_BUILTIN_MONITOR,
30662 IX86_BUILTIN_MWAIT,
30663 IX86_BUILTIN_CLZERO,
30664 IX86_BUILTIN_VEC_INIT_V2SI,
30665 IX86_BUILTIN_VEC_INIT_V4HI,
30666 IX86_BUILTIN_VEC_INIT_V8QI,
30667 IX86_BUILTIN_VEC_EXT_V2DF,
30668 IX86_BUILTIN_VEC_EXT_V2DI,
30669 IX86_BUILTIN_VEC_EXT_V4SF,
30670 IX86_BUILTIN_VEC_EXT_V4SI,
30671 IX86_BUILTIN_VEC_EXT_V8HI,
30672 IX86_BUILTIN_VEC_EXT_V2SI,
30673 IX86_BUILTIN_VEC_EXT_V4HI,
30674 IX86_BUILTIN_VEC_EXT_V16QI,
30675 IX86_BUILTIN_VEC_SET_V2DI,
30676 IX86_BUILTIN_VEC_SET_V4SF,
30677 IX86_BUILTIN_VEC_SET_V4SI,
30678 IX86_BUILTIN_VEC_SET_V8HI,
30679 IX86_BUILTIN_VEC_SET_V4HI,
30680 IX86_BUILTIN_VEC_SET_V16QI,
30681 IX86_BUILTIN_GATHERSIV2DF,
30682 IX86_BUILTIN_GATHERSIV4DF,
30683 IX86_BUILTIN_GATHERDIV2DF,
30684 IX86_BUILTIN_GATHERDIV4DF,
30685 IX86_BUILTIN_GATHERSIV4SF,
30686 IX86_BUILTIN_GATHERSIV8SF,
30687 IX86_BUILTIN_GATHERDIV4SF,
30688 IX86_BUILTIN_GATHERDIV8SF,
30689 IX86_BUILTIN_GATHERSIV2DI,
30690 IX86_BUILTIN_GATHERSIV4DI,
30691 IX86_BUILTIN_GATHERDIV2DI,
30692 IX86_BUILTIN_GATHERDIV4DI,
30693 IX86_BUILTIN_GATHERSIV4SI,
30694 IX86_BUILTIN_GATHERSIV8SI,
30695 IX86_BUILTIN_GATHERDIV4SI,
30696 IX86_BUILTIN_GATHERDIV8SI,
30697 IX86_BUILTIN_VFMSUBSD3_MASK3,
30698 IX86_BUILTIN_VFMSUBSS3_MASK3,
30699 IX86_BUILTIN_GATHER3SIV8SF,
30700 IX86_BUILTIN_GATHER3SIV4SF,
30701 IX86_BUILTIN_GATHER3SIV4DF,
30702 IX86_BUILTIN_GATHER3SIV2DF,
30703 IX86_BUILTIN_GATHER3DIV8SF,
30704 IX86_BUILTIN_GATHER3DIV4SF,
30705 IX86_BUILTIN_GATHER3DIV4DF,
30706 IX86_BUILTIN_GATHER3DIV2DF,
30707 IX86_BUILTIN_GATHER3SIV8SI,
30708 IX86_BUILTIN_GATHER3SIV4SI,
30709 IX86_BUILTIN_GATHER3SIV4DI,
30710 IX86_BUILTIN_GATHER3SIV2DI,
30711 IX86_BUILTIN_GATHER3DIV8SI,
30712 IX86_BUILTIN_GATHER3DIV4SI,
30713 IX86_BUILTIN_GATHER3DIV4DI,
30714 IX86_BUILTIN_GATHER3DIV2DI,
30715 IX86_BUILTIN_SCATTERSIV8SF,
30716 IX86_BUILTIN_SCATTERSIV4SF,
30717 IX86_BUILTIN_SCATTERSIV4DF,
30718 IX86_BUILTIN_SCATTERSIV2DF,
30719 IX86_BUILTIN_SCATTERDIV8SF,
30720 IX86_BUILTIN_SCATTERDIV4SF,
30721 IX86_BUILTIN_SCATTERDIV4DF,
30722 IX86_BUILTIN_SCATTERDIV2DF,
30723 IX86_BUILTIN_SCATTERSIV8SI,
30724 IX86_BUILTIN_SCATTERSIV4SI,
30725 IX86_BUILTIN_SCATTERSIV4DI,
30726 IX86_BUILTIN_SCATTERSIV2DI,
30727 IX86_BUILTIN_SCATTERDIV8SI,
30728 IX86_BUILTIN_SCATTERDIV4SI,
30729 IX86_BUILTIN_SCATTERDIV4DI,
30730 IX86_BUILTIN_SCATTERDIV2DI,
30731 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30732 where all operands are 32-byte or 64-byte wide respectively. */
30733 IX86_BUILTIN_GATHERALTSIV4DF,
30734 IX86_BUILTIN_GATHERALTDIV8SF,
30735 IX86_BUILTIN_GATHERALTSIV4DI,
30736 IX86_BUILTIN_GATHERALTDIV8SI,
30737 IX86_BUILTIN_GATHER3ALTDIV16SF,
30738 IX86_BUILTIN_GATHER3ALTDIV16SI,
30739 IX86_BUILTIN_GATHER3ALTSIV4DF,
30740 IX86_BUILTIN_GATHER3ALTDIV8SF,
30741 IX86_BUILTIN_GATHER3ALTSIV4DI,
30742 IX86_BUILTIN_GATHER3ALTDIV8SI,
30743 IX86_BUILTIN_GATHER3ALTSIV8DF,
30744 IX86_BUILTIN_GATHER3ALTSIV8DI,
30745 IX86_BUILTIN_GATHER3DIV16SF,
30746 IX86_BUILTIN_GATHER3DIV16SI,
30747 IX86_BUILTIN_GATHER3DIV8DF,
30748 IX86_BUILTIN_GATHER3DIV8DI,
30749 IX86_BUILTIN_GATHER3SIV16SF,
30750 IX86_BUILTIN_GATHER3SIV16SI,
30751 IX86_BUILTIN_GATHER3SIV8DF,
30752 IX86_BUILTIN_GATHER3SIV8DI,
30753 IX86_BUILTIN_SCATTERALTSIV8DF,
30754 IX86_BUILTIN_SCATTERALTDIV16SF,
30755 IX86_BUILTIN_SCATTERALTSIV8DI,
30756 IX86_BUILTIN_SCATTERALTDIV16SI,
30757 IX86_BUILTIN_SCATTERDIV16SF,
30758 IX86_BUILTIN_SCATTERDIV16SI,
30759 IX86_BUILTIN_SCATTERDIV8DF,
30760 IX86_BUILTIN_SCATTERDIV8DI,
30761 IX86_BUILTIN_SCATTERSIV16SF,
30762 IX86_BUILTIN_SCATTERSIV16SI,
30763 IX86_BUILTIN_SCATTERSIV8DF,
30764 IX86_BUILTIN_SCATTERSIV8DI,
30765 IX86_BUILTIN_GATHERPFQPD,
30766 IX86_BUILTIN_GATHERPFDPS,
30767 IX86_BUILTIN_GATHERPFDPD,
30768 IX86_BUILTIN_GATHERPFQPS,
30769 IX86_BUILTIN_SCATTERPFDPD,
30770 IX86_BUILTIN_SCATTERPFDPS,
30771 IX86_BUILTIN_SCATTERPFQPD,
30772 IX86_BUILTIN_SCATTERPFQPS,
30773 IX86_BUILTIN_CLWB,
30774 IX86_BUILTIN_CLFLUSHOPT,
30775 IX86_BUILTIN_INFQ,
30776 IX86_BUILTIN_HUGE_VALQ,
30777 IX86_BUILTIN_NANQ,
30778 IX86_BUILTIN_NANSQ,
30779 IX86_BUILTIN_XABORT,
30780 IX86_BUILTIN_ADDCARRYX32,
30781 IX86_BUILTIN_ADDCARRYX64,
30782 IX86_BUILTIN_SBB32,
30783 IX86_BUILTIN_SBB64,
30784 IX86_BUILTIN_RDRAND16_STEP,
30785 IX86_BUILTIN_RDRAND32_STEP,
30786 IX86_BUILTIN_RDRAND64_STEP,
30787 IX86_BUILTIN_RDSEED16_STEP,
30788 IX86_BUILTIN_RDSEED32_STEP,
30789 IX86_BUILTIN_RDSEED64_STEP,
30790 IX86_BUILTIN_MONITORX,
30791 IX86_BUILTIN_MWAITX,
30792 IX86_BUILTIN_CFSTRING,
30793 IX86_BUILTIN_CPU_INIT,
30794 IX86_BUILTIN_CPU_IS,
30795 IX86_BUILTIN_CPU_SUPPORTS,
30796 IX86_BUILTIN_READ_FLAGS,
30797 IX86_BUILTIN_WRITE_FLAGS,
30799 /* All the remaining builtins are tracked in bdesc_* arrays in
30800 i386-builtin.def. Don't add any IX86_BUILTIN_* enumerators after
30801 this point. */
30802 #define BDESC(mask, icode, name, code, comparison, flag) \
30803 code,
30804 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
30805 code, \
30806 IX86_BUILTIN__BDESC_##kindu##_FIRST = code,
30807 #define BDESC_END(kind, next_kind)
30809 #include "i386-builtin.def"
30811 #undef BDESC
30812 #undef BDESC_FIRST
30813 #undef BDESC_END
30815 IX86_BUILTIN_MAX,
30817 IX86_BUILTIN__BDESC_MAX_FIRST = IX86_BUILTIN_MAX,
30819 /* Now just the aliases for bdesc_* start/end. */
30820 #define BDESC(mask, icode, name, code, comparison, flag)
30821 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag)
30822 #define BDESC_END(kind, next_kind) \
30823 IX86_BUILTIN__BDESC_##kind##_LAST \
30824 = IX86_BUILTIN__BDESC_##next_kind##_FIRST - 1,
30826 #include "i386-builtin.def"
30828 #undef BDESC
30829 #undef BDESC_FIRST
30830 #undef BDESC_END
30832 /* Just to make sure there is no comma after the last enumerator. */
30833 IX86_BUILTIN__BDESC_MAX_LAST = IX86_BUILTIN__BDESC_MAX_FIRST
30836 /* Table for the ix86 builtin decls. */
30837 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30839 /* Table of all of the builtin functions that are possible with different ISA's
30840 but are waiting to be built until a function is declared to use that
30841 ISA. */
30842 struct builtin_isa {
30843 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30844 HOST_WIDE_INT isa2; /* additional isa_flags this builtin is defined for */
30845 const char *name; /* function name */
30846 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30847 unsigned char const_p:1; /* true if the declaration is constant */
30848 unsigned char pure_p:1; /* true if the declaration has pure attribute */
30849 bool leaf_p; /* true if the declaration has leaf attribute */
30850 bool nothrow_p; /* true if the declaration has nothrow attribute */
30851 bool set_and_not_built_p;
30854 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30856 /* Bits that can still enable any inclusion of a builtin. */
30857 static HOST_WIDE_INT deferred_isa_values = 0;
30858 static HOST_WIDE_INT deferred_isa_values2 = 0;
30860 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30861 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30862 function decl in the ix86_builtins array. Returns the function decl or
30863 NULL_TREE, if the builtin was not added.
30865 If the front end has a special hook for builtin functions, delay adding
30866 builtin functions that aren't in the current ISA until the ISA is changed
30867 with function specific optimization. Doing so, can save about 300K for the
30868 default compiler. When the builtin is expanded, check at that time whether
30869 it is valid.
30871 If the front end doesn't have a special hook, record all builtins, even if
30872 it isn't an instruction set in the current ISA in case the user uses
30873 function specific options for a different ISA, so that we don't get scope
30874 errors if a builtin is added in the middle of a function scope. */
30876 static inline tree
30877 def_builtin (HOST_WIDE_INT mask, const char *name,
30878 enum ix86_builtin_func_type tcode,
30879 enum ix86_builtins code)
30881 tree decl = NULL_TREE;
30883 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30885 ix86_builtins_isa[(int) code].isa = mask;
30887 mask &= ~OPTION_MASK_ISA_64BIT;
30889 /* Filter out the masks most often ored together with others. */
30890 if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
30891 && mask != OPTION_MASK_ISA_AVX512VL)
30892 mask &= ~OPTION_MASK_ISA_AVX512VL;
30893 if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512BW)
30894 && mask != OPTION_MASK_ISA_AVX512BW)
30895 mask &= ~OPTION_MASK_ISA_AVX512BW;
30897 if (mask == 0
30898 || (mask & ix86_isa_flags) != 0
30899 || (lang_hooks.builtin_function
30900 == lang_hooks.builtin_function_ext_scope))
30902 tree type = ix86_get_builtin_func_type (tcode);
30903 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30904 NULL, NULL_TREE);
30905 ix86_builtins[(int) code] = decl;
30906 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30908 else
30910 /* Just a MASK where set_and_not_built_p == true can potentially
30911 include a builtin. */
30912 deferred_isa_values |= mask;
30913 ix86_builtins[(int) code] = NULL_TREE;
30914 ix86_builtins_isa[(int) code].tcode = tcode;
30915 ix86_builtins_isa[(int) code].name = name;
30916 ix86_builtins_isa[(int) code].leaf_p = false;
30917 ix86_builtins_isa[(int) code].nothrow_p = false;
30918 ix86_builtins_isa[(int) code].const_p = false;
30919 ix86_builtins_isa[(int) code].pure_p = false;
30920 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30924 return decl;
30927 /* Like def_builtin, but also marks the function decl "const". */
30929 static inline tree
30930 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30931 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30933 tree decl = def_builtin (mask, name, tcode, code);
30934 if (decl)
30935 TREE_READONLY (decl) = 1;
30936 else
30937 ix86_builtins_isa[(int) code].const_p = true;
30939 return decl;
30942 /* Like def_builtin, but also marks the function decl "pure". */
30944 static inline tree
30945 def_builtin_pure (HOST_WIDE_INT mask, const char *name,
30946 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30948 tree decl = def_builtin (mask, name, tcode, code);
30949 if (decl)
30950 DECL_PURE_P (decl) = 1;
30951 else
30952 ix86_builtins_isa[(int) code].pure_p = true;
30954 return decl;
30957 /* Like def_builtin, but for additional isa2 flags. */
30959 static inline tree
30960 def_builtin2 (HOST_WIDE_INT mask, const char *name,
30961 enum ix86_builtin_func_type tcode,
30962 enum ix86_builtins code)
30964 tree decl = NULL_TREE;
30966 ix86_builtins_isa[(int) code].isa2 = mask;
30968 if (mask == 0
30969 || (mask & ix86_isa_flags2) != 0
30970 || (lang_hooks.builtin_function
30971 == lang_hooks.builtin_function_ext_scope))
30974 tree type = ix86_get_builtin_func_type (tcode);
30975 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30976 NULL, NULL_TREE);
30977 ix86_builtins[(int) code] = decl;
30978 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30980 else
30982 /* Just a MASK where set_and_not_built_p == true can potentially
30983 include a builtin. */
30984 deferred_isa_values2 |= mask;
30985 ix86_builtins[(int) code] = NULL_TREE;
30986 ix86_builtins_isa[(int) code].tcode = tcode;
30987 ix86_builtins_isa[(int) code].name = name;
30988 ix86_builtins_isa[(int) code].leaf_p = false;
30989 ix86_builtins_isa[(int) code].nothrow_p = false;
30990 ix86_builtins_isa[(int) code].const_p = false;
30991 ix86_builtins_isa[(int) code].pure_p = false;
30992 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30995 return decl;
30998 /* Like def_builtin, but also marks the function decl "const". */
31000 static inline tree
31001 def_builtin_const2 (HOST_WIDE_INT mask, const char *name,
31002 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
31004 tree decl = def_builtin2 (mask, name, tcode, code);
31005 if (decl)
31006 TREE_READONLY (decl) = 1;
31007 else
31008 ix86_builtins_isa[(int) code].const_p = true;
31010 return decl;
31013 /* Like def_builtin, but also marks the function decl "pure". */
31015 static inline tree
31016 def_builtin_pure2 (HOST_WIDE_INT mask, const char *name,
31017 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
31019 tree decl = def_builtin2 (mask, name, tcode, code);
31020 if (decl)
31021 DECL_PURE_P (decl) = 1;
31022 else
31023 ix86_builtins_isa[(int) code].pure_p = true;
31025 return decl;
31028 /* Add any new builtin functions for a given ISA that may not have been
31029 declared. This saves a bit of space compared to adding all of the
31030 declarations to the tree, even if we didn't use them. */
31032 static void
31033 ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2)
31035 isa &= ~OPTION_MASK_ISA_64BIT;
31037 if ((isa & deferred_isa_values) == 0
31038 && (isa2 & deferred_isa_values2) == 0)
31039 return;
31041 /* Bits in ISA value can be removed from potential isa values. */
31042 deferred_isa_values &= ~isa;
31043 deferred_isa_values2 &= ~isa2;
31045 int i;
31046 tree saved_current_target_pragma = current_target_pragma;
31047 current_target_pragma = NULL_TREE;
31049 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
31051 if (((ix86_builtins_isa[i].isa & isa) != 0
31052 || (ix86_builtins_isa[i].isa2 & isa2) != 0)
31053 && ix86_builtins_isa[i].set_and_not_built_p)
31055 tree decl, type;
31057 /* Don't define the builtin again. */
31058 ix86_builtins_isa[i].set_and_not_built_p = false;
31060 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
31061 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
31062 type, i, BUILT_IN_MD, NULL,
31063 NULL_TREE);
31065 ix86_builtins[i] = decl;
31066 if (ix86_builtins_isa[i].const_p)
31067 TREE_READONLY (decl) = 1;
31068 if (ix86_builtins_isa[i].pure_p)
31069 DECL_PURE_P (decl) = 1;
31070 if (ix86_builtins_isa[i].leaf_p)
31071 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
31072 NULL_TREE);
31073 if (ix86_builtins_isa[i].nothrow_p)
31074 TREE_NOTHROW (decl) = 1;
31078 current_target_pragma = saved_current_target_pragma;
31081 /* Bits for builtin_description.flag. */
31083 /* Set when we don't support the comparison natively, and should
31084 swap_comparison in order to support it. */
31085 #define BUILTIN_DESC_SWAP_OPERANDS 1
31087 struct builtin_description
31089 const HOST_WIDE_INT mask;
31090 const enum insn_code icode;
31091 const char *const name;
31092 const enum ix86_builtins code;
31093 const enum rtx_code comparison;
31094 const int flag;
31097 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
31098 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
31099 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
31100 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
31101 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
31102 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
31103 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
31104 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
31105 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
31106 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
31107 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
31108 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
31109 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
31110 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
31111 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
31112 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
31113 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
31114 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
31115 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
31116 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
31117 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
31118 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
31119 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
31120 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
31121 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
31122 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
31123 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
31124 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
31125 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
31126 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
31127 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
31128 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
31129 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
31130 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
31131 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
31132 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
31133 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
31134 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
31135 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
31136 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
31137 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
31138 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
31139 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
31140 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
31141 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
31142 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
31143 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
31144 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
31145 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
31146 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
31147 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
31148 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
31150 #define BDESC(mask, icode, name, code, comparison, flag) \
31151 { mask, icode, name, code, comparison, flag },
31152 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
31153 static const struct builtin_description bdesc_##kind[] = \
31155 BDESC (mask, icode, name, code, comparison, flag)
31156 #define BDESC_END(kind, next_kind) \
31159 #include "i386-builtin.def"
31161 #undef BDESC
31162 #undef BDESC_FIRST
31163 #undef BDESC_END
31165 /* TM vector builtins. */
31167 /* Reuse the existing x86-specific `struct builtin_description' cause
31168 we're lazy. Add casts to make them fit. */
31169 static const struct builtin_description bdesc_tm[] =
31171 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
31172 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
31173 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
31174 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
31175 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
31176 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
31177 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
31179 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
31180 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
31181 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
31182 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
31183 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
31184 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
31185 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
31187 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
31188 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
31189 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
31190 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
31191 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
31192 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
31193 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
31195 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
31196 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
31197 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
31200 /* Initialize the transactional memory vector load/store builtins. */
31202 static void
31203 ix86_init_tm_builtins (void)
31205 enum ix86_builtin_func_type ftype;
31206 const struct builtin_description *d;
31207 size_t i;
31208 tree decl;
31209 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
31210 tree attrs_log, attrs_type_log;
31212 if (!flag_tm)
31213 return;
31215 /* If there are no builtins defined, we must be compiling in a
31216 language without trans-mem support. */
31217 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
31218 return;
31220 /* Use whatever attributes a normal TM load has. */
31221 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
31222 attrs_load = DECL_ATTRIBUTES (decl);
31223 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
31224 /* Use whatever attributes a normal TM store has. */
31225 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
31226 attrs_store = DECL_ATTRIBUTES (decl);
31227 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
31228 /* Use whatever attributes a normal TM log has. */
31229 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
31230 attrs_log = DECL_ATTRIBUTES (decl);
31231 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
31233 for (i = 0, d = bdesc_tm;
31234 i < ARRAY_SIZE (bdesc_tm);
31235 i++, d++)
31237 if ((d->mask & ix86_isa_flags) != 0
31238 || (lang_hooks.builtin_function
31239 == lang_hooks.builtin_function_ext_scope))
31241 tree type, attrs, attrs_type;
31242 enum built_in_function code = (enum built_in_function) d->code;
31244 ftype = (enum ix86_builtin_func_type) d->flag;
31245 type = ix86_get_builtin_func_type (ftype);
31247 if (BUILTIN_TM_LOAD_P (code))
31249 attrs = attrs_load;
31250 attrs_type = attrs_type_load;
31252 else if (BUILTIN_TM_STORE_P (code))
31254 attrs = attrs_store;
31255 attrs_type = attrs_type_store;
31257 else
31259 attrs = attrs_log;
31260 attrs_type = attrs_type_log;
31262 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
31263 /* The builtin without the prefix for
31264 calling it directly. */
31265 d->name + strlen ("__builtin_"),
31266 attrs);
31267 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
31268 set the TYPE_ATTRIBUTES. */
31269 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
31271 set_builtin_decl (code, decl, false);
31276 /* Macros for verification of enum ix86_builtins order. */
31277 #define BDESC_VERIFY(x, y, z) \
31278 gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z)))
31279 #define BDESC_VERIFYS(x, y, z) \
31280 STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z)))
31282 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
31283 IX86_BUILTIN__BDESC_COMI_LAST, 1);
31284 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
31285 IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1);
31286 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
31287 IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1);
31288 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST,
31289 IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1);
31290 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
31291 IX86_BUILTIN__BDESC_ARGS_LAST, 1);
31292 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_FIRST,
31293 IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, 1);
31294 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST,
31295 IX86_BUILTIN__BDESC_ARGS2_LAST, 1);
31296 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_FIRST,
31297 IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST, 1);
31298 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_FIRST,
31299 IX86_BUILTIN__BDESC_MPX_LAST, 1);
31300 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
31301 IX86_BUILTIN__BDESC_MPX_CONST_LAST, 1);
31302 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_FIRST,
31303 IX86_BUILTIN__BDESC_MULTI_ARG_LAST, 1);
31304 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
31305 IX86_BUILTIN__BDESC_CET_LAST, 1);
31306 BDESC_VERIFYS (IX86_BUILTIN_MAX,
31307 IX86_BUILTIN__BDESC_CET_NORMAL_LAST, 1);
31309 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
31310 in the current target ISA to allow the user to compile particular modules
31311 with different target specific options that differ from the command line
31312 options. */
31313 static void
31314 ix86_init_mmx_sse_builtins (void)
31316 const struct builtin_description * d;
31317 enum ix86_builtin_func_type ftype;
31318 size_t i;
31320 /* Add all special builtins with variable number of operands. */
31321 for (i = 0, d = bdesc_special_args;
31322 i < ARRAY_SIZE (bdesc_special_args);
31323 i++, d++)
31325 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, i);
31326 if (d->name == 0)
31327 continue;
31329 ftype = (enum ix86_builtin_func_type) d->flag;
31330 def_builtin (d->mask, d->name, ftype, d->code);
31332 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST,
31333 IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
31334 ARRAY_SIZE (bdesc_special_args) - 1);
31336 /* Add all special builtins with variable number of operands. */
31337 for (i = 0, d = bdesc_special_args2;
31338 i < ARRAY_SIZE (bdesc_special_args2);
31339 i++, d++)
31341 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST, i);
31342 if (d->name == 0)
31343 continue;
31345 ftype = (enum ix86_builtin_func_type) d->flag;
31346 def_builtin2 (d->mask, d->name, ftype, d->code);
31348 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST,
31349 IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST,
31350 ARRAY_SIZE (bdesc_special_args2) - 1);
31352 /* Add all builtins with variable number of operands. */
31353 for (i = 0, d = bdesc_args;
31354 i < ARRAY_SIZE (bdesc_args);
31355 i++, d++)
31357 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS_FIRST, i);
31358 if (d->name == 0)
31359 continue;
31361 ftype = (enum ix86_builtin_func_type) d->flag;
31362 def_builtin_const (d->mask, d->name, ftype, d->code);
31364 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST,
31365 IX86_BUILTIN__BDESC_ARGS_FIRST,
31366 ARRAY_SIZE (bdesc_args) - 1);
31368 /* Add all builtins with variable number of operands. */
31369 for (i = 0, d = bdesc_args2;
31370 i < ARRAY_SIZE (bdesc_args2);
31371 i++, d++)
31373 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS2_FIRST, i);
31374 if (d->name == 0)
31375 continue;
31377 ftype = (enum ix86_builtin_func_type) d->flag;
31378 def_builtin_const2 (d->mask, d->name, ftype, d->code);
31380 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_LAST,
31381 IX86_BUILTIN__BDESC_ARGS2_FIRST,
31382 ARRAY_SIZE (bdesc_args2) - 1);
31384 /* Add all builtins with rounding. */
31385 for (i = 0, d = bdesc_round_args;
31386 i < ARRAY_SIZE (bdesc_round_args);
31387 i++, d++)
31389 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, i);
31390 if (d->name == 0)
31391 continue;
31393 ftype = (enum ix86_builtin_func_type) d->flag;
31394 def_builtin_const (d->mask, d->name, ftype, d->code);
31396 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST,
31397 IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
31398 ARRAY_SIZE (bdesc_round_args) - 1);
31400 /* pcmpestr[im] insns. */
31401 for (i = 0, d = bdesc_pcmpestr;
31402 i < ARRAY_SIZE (bdesc_pcmpestr);
31403 i++, d++)
31405 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPESTR_FIRST, i);
31406 if (d->code == IX86_BUILTIN_PCMPESTRM128)
31407 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
31408 else
31409 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
31410 def_builtin_const (d->mask, d->name, ftype, d->code);
31412 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST,
31413 IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
31414 ARRAY_SIZE (bdesc_pcmpestr) - 1);
31416 /* pcmpistr[im] insns. */
31417 for (i = 0, d = bdesc_pcmpistr;
31418 i < ARRAY_SIZE (bdesc_pcmpistr);
31419 i++, d++)
31421 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPISTR_FIRST, i);
31422 if (d->code == IX86_BUILTIN_PCMPISTRM128)
31423 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
31424 else
31425 ftype = INT_FTYPE_V16QI_V16QI_INT;
31426 def_builtin_const (d->mask, d->name, ftype, d->code);
31428 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST,
31429 IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
31430 ARRAY_SIZE (bdesc_pcmpistr) - 1);
31432 /* comi/ucomi insns. */
31433 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
31435 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_COMI_FIRST, i);
31436 if (d->mask == OPTION_MASK_ISA_SSE2)
31437 ftype = INT_FTYPE_V2DF_V2DF;
31438 else
31439 ftype = INT_FTYPE_V4SF_V4SF;
31440 def_builtin_const (d->mask, d->name, ftype, d->code);
31442 BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST,
31443 IX86_BUILTIN__BDESC_COMI_FIRST,
31444 ARRAY_SIZE (bdesc_comi) - 1);
31446 /* SSE */
31447 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
31448 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
31449 def_builtin_pure (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
31450 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
31452 /* SSE or 3DNow!A */
31453 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
31454 /* As it uses V4HImode, we have to require -mmmx too. */
31455 | OPTION_MASK_ISA_MMX,
31456 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
31457 IX86_BUILTIN_MASKMOVQ);
31459 /* SSE2 */
31460 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
31461 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
31463 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
31464 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
31465 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
31466 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
31468 /* SSE3. */
31469 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
31470 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
31471 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
31472 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
31474 /* AES */
31475 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31476 "__builtin_ia32_aesenc128",
31477 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
31478 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31479 "__builtin_ia32_aesenclast128",
31480 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
31481 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31482 "__builtin_ia32_aesdec128",
31483 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
31484 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31485 "__builtin_ia32_aesdeclast128",
31486 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
31487 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31488 "__builtin_ia32_aesimc128",
31489 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
31490 def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31491 "__builtin_ia32_aeskeygenassist128",
31492 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
31494 /* PCLMUL */
31495 def_builtin_const (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2,
31496 "__builtin_ia32_pclmulqdq128",
31497 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
31499 /* RDRND */
31500 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
31501 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
31502 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
31503 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
31504 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
31505 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
31506 IX86_BUILTIN_RDRAND64_STEP);
31508 /* AVX2 */
31509 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
31510 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
31511 IX86_BUILTIN_GATHERSIV2DF);
31513 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
31514 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
31515 IX86_BUILTIN_GATHERSIV4DF);
31517 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
31518 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
31519 IX86_BUILTIN_GATHERDIV2DF);
31521 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
31522 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
31523 IX86_BUILTIN_GATHERDIV4DF);
31525 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
31526 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
31527 IX86_BUILTIN_GATHERSIV4SF);
31529 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
31530 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
31531 IX86_BUILTIN_GATHERSIV8SF);
31533 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
31534 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
31535 IX86_BUILTIN_GATHERDIV4SF);
31537 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
31538 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
31539 IX86_BUILTIN_GATHERDIV8SF);
31541 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
31542 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
31543 IX86_BUILTIN_GATHERSIV2DI);
31545 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
31546 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
31547 IX86_BUILTIN_GATHERSIV4DI);
31549 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
31550 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
31551 IX86_BUILTIN_GATHERDIV2DI);
31553 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
31554 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
31555 IX86_BUILTIN_GATHERDIV4DI);
31557 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
31558 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
31559 IX86_BUILTIN_GATHERSIV4SI);
31561 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
31562 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
31563 IX86_BUILTIN_GATHERSIV8SI);
31565 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
31566 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
31567 IX86_BUILTIN_GATHERDIV4SI);
31569 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
31570 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
31571 IX86_BUILTIN_GATHERDIV8SI);
31573 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
31574 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
31575 IX86_BUILTIN_GATHERALTSIV4DF);
31577 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
31578 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
31579 IX86_BUILTIN_GATHERALTDIV8SF);
31581 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
31582 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
31583 IX86_BUILTIN_GATHERALTSIV4DI);
31585 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
31586 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
31587 IX86_BUILTIN_GATHERALTDIV8SI);
31589 /* AVX512F */
31590 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31591 V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT,
31592 IX86_BUILTIN_GATHER3SIV16SF);
31594 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31595 V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT,
31596 IX86_BUILTIN_GATHER3SIV8DF);
31598 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31599 V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT,
31600 IX86_BUILTIN_GATHER3DIV16SF);
31602 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31603 V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT,
31604 IX86_BUILTIN_GATHER3DIV8DF);
31606 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31607 V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT,
31608 IX86_BUILTIN_GATHER3SIV16SI);
31610 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31611 V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT,
31612 IX86_BUILTIN_GATHER3SIV8DI);
31614 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31615 V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT,
31616 IX86_BUILTIN_GATHER3DIV16SI);
31618 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31619 V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT,
31620 IX86_BUILTIN_GATHER3DIV8DI);
31622 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31623 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31624 IX86_BUILTIN_GATHER3ALTSIV8DF);
31626 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31627 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31628 IX86_BUILTIN_GATHER3ALTDIV16SF);
31630 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31631 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31632 IX86_BUILTIN_GATHER3ALTSIV8DI);
31634 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31635 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31636 IX86_BUILTIN_GATHER3ALTDIV16SI);
31638 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31639 VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT,
31640 IX86_BUILTIN_SCATTERSIV16SF);
31642 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31643 VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT,
31644 IX86_BUILTIN_SCATTERSIV8DF);
31646 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31647 VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT,
31648 IX86_BUILTIN_SCATTERDIV16SF);
31650 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31651 VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT,
31652 IX86_BUILTIN_SCATTERDIV8DF);
31654 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31655 VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT,
31656 IX86_BUILTIN_SCATTERSIV16SI);
31658 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31659 VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT,
31660 IX86_BUILTIN_SCATTERSIV8DI);
31662 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31663 VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT,
31664 IX86_BUILTIN_SCATTERDIV16SI);
31666 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31667 VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT,
31668 IX86_BUILTIN_SCATTERDIV8DI);
31670 /* AVX512VL */
31671 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
31672 V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT,
31673 IX86_BUILTIN_GATHER3SIV2DF);
31675 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
31676 V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT,
31677 IX86_BUILTIN_GATHER3SIV4DF);
31679 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
31680 V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT,
31681 IX86_BUILTIN_GATHER3DIV2DF);
31683 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
31684 V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT,
31685 IX86_BUILTIN_GATHER3DIV4DF);
31687 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
31688 V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT,
31689 IX86_BUILTIN_GATHER3SIV4SF);
31691 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
31692 V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT,
31693 IX86_BUILTIN_GATHER3SIV8SF);
31695 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
31696 V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT,
31697 IX86_BUILTIN_GATHER3DIV4SF);
31699 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
31700 V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT,
31701 IX86_BUILTIN_GATHER3DIV8SF);
31703 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
31704 V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT,
31705 IX86_BUILTIN_GATHER3SIV2DI);
31707 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
31708 V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT,
31709 IX86_BUILTIN_GATHER3SIV4DI);
31711 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
31712 V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT,
31713 IX86_BUILTIN_GATHER3DIV2DI);
31715 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
31716 V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT,
31717 IX86_BUILTIN_GATHER3DIV4DI);
31719 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
31720 V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT,
31721 IX86_BUILTIN_GATHER3SIV4SI);
31723 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
31724 V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT,
31725 IX86_BUILTIN_GATHER3SIV8SI);
31727 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
31728 V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT,
31729 IX86_BUILTIN_GATHER3DIV4SI);
31731 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
31732 V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT,
31733 IX86_BUILTIN_GATHER3DIV8SI);
31735 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
31736 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
31737 IX86_BUILTIN_GATHER3ALTSIV4DF);
31739 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
31740 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
31741 IX86_BUILTIN_GATHER3ALTDIV8SF);
31743 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
31744 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
31745 IX86_BUILTIN_GATHER3ALTSIV4DI);
31747 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
31748 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
31749 IX86_BUILTIN_GATHER3ALTDIV8SI);
31751 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
31752 VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT,
31753 IX86_BUILTIN_SCATTERSIV8SF);
31755 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
31756 VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT,
31757 IX86_BUILTIN_SCATTERSIV4SF);
31759 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
31760 VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT,
31761 IX86_BUILTIN_SCATTERSIV4DF);
31763 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
31764 VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT,
31765 IX86_BUILTIN_SCATTERSIV2DF);
31767 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
31768 VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT,
31769 IX86_BUILTIN_SCATTERDIV8SF);
31771 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
31772 VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT,
31773 IX86_BUILTIN_SCATTERDIV4SF);
31775 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
31776 VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT,
31777 IX86_BUILTIN_SCATTERDIV4DF);
31779 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
31780 VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT,
31781 IX86_BUILTIN_SCATTERDIV2DF);
31783 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
31784 VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT,
31785 IX86_BUILTIN_SCATTERSIV8SI);
31787 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
31788 VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT,
31789 IX86_BUILTIN_SCATTERSIV4SI);
31791 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
31792 VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT,
31793 IX86_BUILTIN_SCATTERSIV4DI);
31795 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
31796 VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT,
31797 IX86_BUILTIN_SCATTERSIV2DI);
31799 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
31800 VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT,
31801 IX86_BUILTIN_SCATTERDIV8SI);
31803 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
31804 VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT,
31805 IX86_BUILTIN_SCATTERDIV4SI);
31807 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
31808 VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT,
31809 IX86_BUILTIN_SCATTERDIV4DI);
31811 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
31812 VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT,
31813 IX86_BUILTIN_SCATTERDIV2DI);
31814 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
31815 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
31816 IX86_BUILTIN_SCATTERALTSIV8DF);
31818 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
31819 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
31820 IX86_BUILTIN_SCATTERALTDIV16SF);
31822 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
31823 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
31824 IX86_BUILTIN_SCATTERALTSIV8DI);
31826 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
31827 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
31828 IX86_BUILTIN_SCATTERALTDIV16SI);
31830 /* AVX512PF */
31831 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31832 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
31833 IX86_BUILTIN_GATHERPFDPD);
31834 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31835 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
31836 IX86_BUILTIN_GATHERPFDPS);
31837 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31838 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31839 IX86_BUILTIN_GATHERPFQPD);
31840 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31841 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31842 IX86_BUILTIN_GATHERPFQPS);
31843 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31844 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
31845 IX86_BUILTIN_SCATTERPFDPD);
31846 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31847 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
31848 IX86_BUILTIN_SCATTERPFDPS);
31849 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31850 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31851 IX86_BUILTIN_SCATTERPFQPD);
31852 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31853 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31854 IX86_BUILTIN_SCATTERPFQPS);
31856 /* SHA */
31857 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31858 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31859 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31860 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31861 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31862 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31863 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31864 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31865 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31866 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31867 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31868 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31869 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31870 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31872 /* RTM. */
31873 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31874 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31876 /* MMX access to the vec_init patterns. */
31877 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31878 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31880 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31881 V4HI_FTYPE_HI_HI_HI_HI,
31882 IX86_BUILTIN_VEC_INIT_V4HI);
31884 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31885 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31886 IX86_BUILTIN_VEC_INIT_V8QI);
31888 /* Access to the vec_extract patterns. */
31889 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31890 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31891 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31892 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31893 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31894 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31895 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31896 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31897 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31898 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31900 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
31901 /* As it uses V4HImode, we have to require -mmmx too. */
31902 | OPTION_MASK_ISA_MMX,
31903 "__builtin_ia32_vec_ext_v4hi",
31904 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31906 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31907 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31909 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31910 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31912 /* Access to the vec_set patterns. */
31913 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31914 "__builtin_ia32_vec_set_v2di",
31915 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31917 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31918 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31920 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31921 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31923 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31924 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31926 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
31927 /* As it uses V4HImode, we have to require -mmmx too. */
31928 | OPTION_MASK_ISA_MMX,
31929 "__builtin_ia32_vec_set_v4hi",
31930 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31932 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31933 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31935 /* RDSEED */
31936 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31937 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31938 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31939 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31940 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31941 "__builtin_ia32_rdseed_di_step",
31942 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31944 /* ADCX */
31945 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31946 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31947 def_builtin (OPTION_MASK_ISA_64BIT,
31948 "__builtin_ia32_addcarryx_u64",
31949 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31950 IX86_BUILTIN_ADDCARRYX64);
31952 /* SBB */
31953 def_builtin (0, "__builtin_ia32_sbb_u32",
31954 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
31955 def_builtin (OPTION_MASK_ISA_64BIT,
31956 "__builtin_ia32_sbb_u64",
31957 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31958 IX86_BUILTIN_SBB64);
31960 /* Read/write FLAGS. */
31961 def_builtin (0, "__builtin_ia32_readeflags_u32",
31962 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31963 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31964 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31965 def_builtin (0, "__builtin_ia32_writeeflags_u32",
31966 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31967 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31968 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31970 /* CLFLUSHOPT. */
31971 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31972 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31974 /* CLWB. */
31975 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
31976 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
31978 /* MONITORX and MWAITX. */
31979 def_builtin2 (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
31980 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
31981 def_builtin2 (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
31982 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
31984 /* CLZERO. */
31985 def_builtin2 (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
31986 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
31988 /* Add FMA4 multi-arg argument instructions */
31989 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31991 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, i);
31992 if (d->name == 0)
31993 continue;
31995 ftype = (enum ix86_builtin_func_type) d->flag;
31996 def_builtin_const (d->mask, d->name, ftype, d->code);
31998 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST,
31999 IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
32000 ARRAY_SIZE (bdesc_multi_arg) - 1);
32002 /* Add CET inrinsics. */
32003 for (i = 0, d = bdesc_cet; i < ARRAY_SIZE (bdesc_cet); i++, d++)
32005 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_FIRST, i);
32006 if (d->name == 0)
32007 continue;
32009 ftype = (enum ix86_builtin_func_type) d->flag;
32010 def_builtin (d->mask, d->name, ftype, d->code);
32012 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_LAST,
32013 IX86_BUILTIN__BDESC_CET_FIRST,
32014 ARRAY_SIZE (bdesc_cet) - 1);
32016 for (i = 0, d = bdesc_cet_rdssp;
32017 i < ARRAY_SIZE (bdesc_cet_rdssp);
32018 i++, d++)
32020 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, i);
32021 if (d->name == 0)
32022 continue;
32024 ftype = (enum ix86_builtin_func_type) d->flag;
32025 def_builtin (d->mask, d->name, ftype, d->code);
32027 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_LAST,
32028 IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
32029 ARRAY_SIZE (bdesc_cet_rdssp) - 1);
32032 static void
32033 ix86_init_mpx_builtins ()
32035 const struct builtin_description * d;
32036 enum ix86_builtin_func_type ftype;
32037 tree decl;
32038 size_t i;
32040 for (i = 0, d = bdesc_mpx;
32041 i < ARRAY_SIZE (bdesc_mpx);
32042 i++, d++)
32044 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MPX_FIRST, i);
32045 if (d->name == 0)
32046 continue;
32048 ftype = (enum ix86_builtin_func_type) d->flag;
32049 decl = def_builtin2 (d->mask, d->name, ftype, d->code);
32051 /* With no leaf and nothrow flags for MPX builtins
32052 abnormal edges may follow its call when setjmp
32053 presents in the function. Since we may have a lot
32054 of MPX builtins calls it causes lots of useless
32055 edges and enormous PHI nodes. To avoid this we mark
32056 MPX builtins as leaf and nothrow. */
32057 if (decl)
32059 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
32060 NULL_TREE);
32061 TREE_NOTHROW (decl) = 1;
32063 else
32065 ix86_builtins_isa[(int)d->code].leaf_p = true;
32066 ix86_builtins_isa[(int)d->code].nothrow_p = true;
32069 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_LAST,
32070 IX86_BUILTIN__BDESC_MPX_FIRST,
32071 ARRAY_SIZE (bdesc_mpx) - 1);
32073 for (i = 0, d = bdesc_mpx_const;
32074 i < ARRAY_SIZE (bdesc_mpx_const);
32075 i++, d++)
32077 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MPX_CONST_FIRST, i);
32078 if (d->name == 0)
32079 continue;
32081 ftype = (enum ix86_builtin_func_type) d->flag;
32082 decl = def_builtin_const2 (d->mask, d->name, ftype, d->code);
32084 if (decl)
32086 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
32087 NULL_TREE);
32088 TREE_NOTHROW (decl) = 1;
32090 else
32092 ix86_builtins_isa[(int)d->code].leaf_p = true;
32093 ix86_builtins_isa[(int)d->code].nothrow_p = true;
32096 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_LAST,
32097 IX86_BUILTIN__BDESC_MPX_CONST_FIRST,
32098 ARRAY_SIZE (bdesc_mpx_const) - 1);
32100 #undef BDESC_VERIFY
32101 #undef BDESC_VERIFYS
32103 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
32104 to return a pointer to VERSION_DECL if the outcome of the expression
32105 formed by PREDICATE_CHAIN is true. This function will be called during
32106 version dispatch to decide which function version to execute. It returns
32107 the basic block at the end, to which more conditions can be added. */
32109 static basic_block
32110 add_condition_to_bb (tree function_decl, tree version_decl,
32111 tree predicate_chain, basic_block new_bb)
32113 gimple *return_stmt;
32114 tree convert_expr, result_var;
32115 gimple *convert_stmt;
32116 gimple *call_cond_stmt;
32117 gimple *if_else_stmt;
32119 basic_block bb1, bb2, bb3;
32120 edge e12, e23;
32122 tree cond_var, and_expr_var = NULL_TREE;
32123 gimple_seq gseq;
32125 tree predicate_decl, predicate_arg;
32127 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
32129 gcc_assert (new_bb != NULL);
32130 gseq = bb_seq (new_bb);
32133 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
32134 build_fold_addr_expr (version_decl));
32135 result_var = create_tmp_var (ptr_type_node);
32136 convert_stmt = gimple_build_assign (result_var, convert_expr);
32137 return_stmt = gimple_build_return (result_var);
32139 if (predicate_chain == NULL_TREE)
32141 gimple_seq_add_stmt (&gseq, convert_stmt);
32142 gimple_seq_add_stmt (&gseq, return_stmt);
32143 set_bb_seq (new_bb, gseq);
32144 gimple_set_bb (convert_stmt, new_bb);
32145 gimple_set_bb (return_stmt, new_bb);
32146 pop_cfun ();
32147 return new_bb;
32150 while (predicate_chain != NULL)
32152 cond_var = create_tmp_var (integer_type_node);
32153 predicate_decl = TREE_PURPOSE (predicate_chain);
32154 predicate_arg = TREE_VALUE (predicate_chain);
32155 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
32156 gimple_call_set_lhs (call_cond_stmt, cond_var);
32158 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
32159 gimple_set_bb (call_cond_stmt, new_bb);
32160 gimple_seq_add_stmt (&gseq, call_cond_stmt);
32162 predicate_chain = TREE_CHAIN (predicate_chain);
32164 if (and_expr_var == NULL)
32165 and_expr_var = cond_var;
32166 else
32168 gimple *assign_stmt;
32169 /* Use MIN_EXPR to check if any integer is zero?.
32170 and_expr_var = min_expr <cond_var, and_expr_var> */
32171 assign_stmt = gimple_build_assign (and_expr_var,
32172 build2 (MIN_EXPR, integer_type_node,
32173 cond_var, and_expr_var));
32175 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
32176 gimple_set_bb (assign_stmt, new_bb);
32177 gimple_seq_add_stmt (&gseq, assign_stmt);
32181 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
32182 integer_zero_node,
32183 NULL_TREE, NULL_TREE);
32184 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
32185 gimple_set_bb (if_else_stmt, new_bb);
32186 gimple_seq_add_stmt (&gseq, if_else_stmt);
32188 gimple_seq_add_stmt (&gseq, convert_stmt);
32189 gimple_seq_add_stmt (&gseq, return_stmt);
32190 set_bb_seq (new_bb, gseq);
32192 bb1 = new_bb;
32193 e12 = split_block (bb1, if_else_stmt);
32194 bb2 = e12->dest;
32195 e12->flags &= ~EDGE_FALLTHRU;
32196 e12->flags |= EDGE_TRUE_VALUE;
32198 e23 = split_block (bb2, return_stmt);
32200 gimple_set_bb (convert_stmt, bb2);
32201 gimple_set_bb (return_stmt, bb2);
32203 bb3 = e23->dest;
32204 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
32206 remove_edge (e23);
32207 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
32209 pop_cfun ();
32211 return bb3;
32214 /* This parses the attribute arguments to target in DECL and determines
32215 the right builtin to use to match the platform specification.
32216 It returns the priority value for this version decl. If PREDICATE_LIST
32217 is not NULL, it stores the list of cpu features that need to be checked
32218 before dispatching this function. */
32220 static unsigned int
32221 get_builtin_code_for_version (tree decl, tree *predicate_list)
32223 tree attrs;
32224 struct cl_target_option cur_target;
32225 tree target_node;
32226 struct cl_target_option *new_target;
32227 const char *arg_str = NULL;
32228 const char *attrs_str = NULL;
32229 char *tok_str = NULL;
32230 char *token;
32232 /* Priority of i386 features, greater value is higher priority. This is
32233 used to decide the order in which function dispatch must happen. For
32234 instance, a version specialized for SSE4.2 should be checked for dispatch
32235 before a version for SSE3, as SSE4.2 implies SSE3. */
32236 enum feature_priority
32238 P_ZERO = 0,
32239 P_MMX,
32240 P_SSE,
32241 P_SSE2,
32242 P_SSE3,
32243 P_SSSE3,
32244 P_PROC_SSSE3,
32245 P_SSE4_A,
32246 P_PROC_SSE4_A,
32247 P_SSE4_1,
32248 P_SSE4_2,
32249 P_PROC_SSE4_2,
32250 P_POPCNT,
32251 P_AES,
32252 P_PCLMUL,
32253 P_AVX,
32254 P_PROC_AVX,
32255 P_BMI,
32256 P_PROC_BMI,
32257 P_FMA4,
32258 P_XOP,
32259 P_PROC_XOP,
32260 P_FMA,
32261 P_PROC_FMA,
32262 P_BMI2,
32263 P_AVX2,
32264 P_PROC_AVX2,
32265 P_AVX512F,
32266 P_PROC_AVX512F
32269 enum feature_priority priority = P_ZERO;
32271 /* These are the target attribute strings for which a dispatcher is
32272 available, from fold_builtin_cpu. */
32274 static struct _feature_list
32276 const char *const name;
32277 const enum feature_priority priority;
32279 const feature_list[] =
32281 {"mmx", P_MMX},
32282 {"sse", P_SSE},
32283 {"sse2", P_SSE2},
32284 {"sse3", P_SSE3},
32285 {"sse4a", P_SSE4_A},
32286 {"ssse3", P_SSSE3},
32287 {"sse4.1", P_SSE4_1},
32288 {"sse4.2", P_SSE4_2},
32289 {"popcnt", P_POPCNT},
32290 {"aes", P_AES},
32291 {"pclmul", P_PCLMUL},
32292 {"avx", P_AVX},
32293 {"bmi", P_BMI},
32294 {"fma4", P_FMA4},
32295 {"xop", P_XOP},
32296 {"fma", P_FMA},
32297 {"bmi2", P_BMI2},
32298 {"avx2", P_AVX2},
32299 {"avx512f", P_AVX512F}
32303 static unsigned int NUM_FEATURES
32304 = sizeof (feature_list) / sizeof (struct _feature_list);
32306 unsigned int i;
32308 tree predicate_chain = NULL_TREE;
32309 tree predicate_decl, predicate_arg;
32311 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32312 gcc_assert (attrs != NULL);
32314 attrs = TREE_VALUE (TREE_VALUE (attrs));
32316 gcc_assert (TREE_CODE (attrs) == STRING_CST);
32317 attrs_str = TREE_STRING_POINTER (attrs);
32319 /* Return priority zero for default function. */
32320 if (strcmp (attrs_str, "default") == 0)
32321 return 0;
32323 /* Handle arch= if specified. For priority, set it to be 1 more than
32324 the best instruction set the processor can handle. For instance, if
32325 there is a version for atom and a version for ssse3 (the highest ISA
32326 priority for atom), the atom version must be checked for dispatch
32327 before the ssse3 version. */
32328 if (strstr (attrs_str, "arch=") != NULL)
32330 cl_target_option_save (&cur_target, &global_options);
32331 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
32332 &global_options_set);
32334 gcc_assert (target_node);
32335 new_target = TREE_TARGET_OPTION (target_node);
32336 gcc_assert (new_target);
32338 if (new_target->arch_specified && new_target->arch > 0)
32340 switch (new_target->arch)
32342 case PROCESSOR_CORE2:
32343 arg_str = "core2";
32344 priority = P_PROC_SSSE3;
32345 break;
32346 case PROCESSOR_NEHALEM:
32347 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
32349 arg_str = "westmere";
32350 priority = P_AES;
32352 else
32354 /* We translate "arch=corei7" and "arch=nehalem" to
32355 "corei7" so that it will be mapped to M_INTEL_COREI7
32356 as cpu type to cover all M_INTEL_COREI7_XXXs. */
32357 arg_str = "corei7";
32358 priority = P_PROC_SSE4_2;
32360 break;
32361 case PROCESSOR_SANDYBRIDGE:
32362 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
32363 arg_str = "ivybridge";
32364 else
32365 arg_str = "sandybridge";
32366 priority = P_PROC_AVX;
32367 break;
32368 case PROCESSOR_HASWELL:
32369 case PROCESSOR_SKYLAKE_AVX512:
32370 if (new_target->x_ix86_isa_flags
32371 & OPTION_MASK_ISA_AVX512VBMI)
32372 arg_str = "cannonlake";
32373 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
32374 arg_str = "skylake-avx512";
32375 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
32376 arg_str = "skylake";
32377 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
32378 arg_str = "broadwell";
32379 else
32380 arg_str = "haswell";
32381 priority = P_PROC_AVX2;
32382 break;
32383 case PROCESSOR_ICELAKE_CLIENT:
32384 arg_str = "icelake-client";
32385 priority = P_PROC_AVX2;
32386 break;
32387 case PROCESSOR_ICELAKE_SERVER:
32388 arg_str = "icelake-server";
32389 priority = P_PROC_AVX2;
32390 break;
32391 case PROCESSOR_BONNELL:
32392 arg_str = "bonnell";
32393 priority = P_PROC_SSSE3;
32394 break;
32395 case PROCESSOR_KNL:
32396 arg_str = "knl";
32397 priority = P_PROC_AVX512F;
32398 break;
32399 case PROCESSOR_KNM:
32400 arg_str = "knm";
32401 priority = P_PROC_AVX512F;
32402 break;
32403 case PROCESSOR_SILVERMONT:
32404 arg_str = "silvermont";
32405 priority = P_PROC_SSE4_2;
32406 break;
32407 case PROCESSOR_AMDFAM10:
32408 arg_str = "amdfam10h";
32409 priority = P_PROC_SSE4_A;
32410 break;
32411 case PROCESSOR_BTVER1:
32412 arg_str = "btver1";
32413 priority = P_PROC_SSE4_A;
32414 break;
32415 case PROCESSOR_BTVER2:
32416 arg_str = "btver2";
32417 priority = P_PROC_BMI;
32418 break;
32419 case PROCESSOR_BDVER1:
32420 arg_str = "bdver1";
32421 priority = P_PROC_XOP;
32422 break;
32423 case PROCESSOR_BDVER2:
32424 arg_str = "bdver2";
32425 priority = P_PROC_FMA;
32426 break;
32427 case PROCESSOR_BDVER3:
32428 arg_str = "bdver3";
32429 priority = P_PROC_FMA;
32430 break;
32431 case PROCESSOR_BDVER4:
32432 arg_str = "bdver4";
32433 priority = P_PROC_AVX2;
32434 break;
32435 case PROCESSOR_ZNVER1:
32436 arg_str = "znver1";
32437 priority = P_PROC_AVX2;
32438 break;
32442 cl_target_option_restore (&global_options, &cur_target);
32444 if (predicate_list && arg_str == NULL)
32446 error_at (DECL_SOURCE_LOCATION (decl),
32447 "No dispatcher found for the versioning attributes");
32448 return 0;
32451 if (predicate_list)
32453 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
32454 /* For a C string literal the length includes the trailing NULL. */
32455 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
32456 predicate_chain = tree_cons (predicate_decl, predicate_arg,
32457 predicate_chain);
32461 /* Process feature name. */
32462 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
32463 strcpy (tok_str, attrs_str);
32464 token = strtok (tok_str, ",");
32465 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
32467 while (token != NULL)
32469 /* Do not process "arch=" */
32470 if (strncmp (token, "arch=", 5) == 0)
32472 token = strtok (NULL, ",");
32473 continue;
32475 for (i = 0; i < NUM_FEATURES; ++i)
32477 if (strcmp (token, feature_list[i].name) == 0)
32479 if (predicate_list)
32481 predicate_arg = build_string_literal (
32482 strlen (feature_list[i].name) + 1,
32483 feature_list[i].name);
32484 predicate_chain = tree_cons (predicate_decl, predicate_arg,
32485 predicate_chain);
32487 /* Find the maximum priority feature. */
32488 if (feature_list[i].priority > priority)
32489 priority = feature_list[i].priority;
32491 break;
32494 if (predicate_list && i == NUM_FEATURES)
32496 error_at (DECL_SOURCE_LOCATION (decl),
32497 "No dispatcher found for %s", token);
32498 return 0;
32500 token = strtok (NULL, ",");
32502 free (tok_str);
32504 if (predicate_list && predicate_chain == NULL_TREE)
32506 error_at (DECL_SOURCE_LOCATION (decl),
32507 "No dispatcher found for the versioning attributes : %s",
32508 attrs_str);
32509 return 0;
32511 else if (predicate_list)
32513 predicate_chain = nreverse (predicate_chain);
32514 *predicate_list = predicate_chain;
32517 return priority;
32520 /* This compares the priority of target features in function DECL1
32521 and DECL2. It returns positive value if DECL1 is higher priority,
32522 negative value if DECL2 is higher priority and 0 if they are the
32523 same. */
32525 static int
32526 ix86_compare_version_priority (tree decl1, tree decl2)
32528 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
32529 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
32531 return (int)priority1 - (int)priority2;
32534 /* V1 and V2 point to function versions with different priorities
32535 based on the target ISA. This function compares their priorities. */
32537 static int
32538 feature_compare (const void *v1, const void *v2)
32540 typedef struct _function_version_info
32542 tree version_decl;
32543 tree predicate_chain;
32544 unsigned int dispatch_priority;
32545 } function_version_info;
32547 const function_version_info c1 = *(const function_version_info *)v1;
32548 const function_version_info c2 = *(const function_version_info *)v2;
32549 return (c2.dispatch_priority - c1.dispatch_priority);
32552 /* This function generates the dispatch function for
32553 multi-versioned functions. DISPATCH_DECL is the function which will
32554 contain the dispatch logic. FNDECLS are the function choices for
32555 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
32556 in DISPATCH_DECL in which the dispatch code is generated. */
32558 static int
32559 dispatch_function_versions (tree dispatch_decl,
32560 void *fndecls_p,
32561 basic_block *empty_bb)
32563 tree default_decl;
32564 gimple *ifunc_cpu_init_stmt;
32565 gimple_seq gseq;
32566 int ix;
32567 tree ele;
32568 vec<tree> *fndecls;
32569 unsigned int num_versions = 0;
32570 unsigned int actual_versions = 0;
32571 unsigned int i;
32573 struct _function_version_info
32575 tree version_decl;
32576 tree predicate_chain;
32577 unsigned int dispatch_priority;
32578 }*function_version_info;
32580 gcc_assert (dispatch_decl != NULL
32581 && fndecls_p != NULL
32582 && empty_bb != NULL);
32584 /*fndecls_p is actually a vector. */
32585 fndecls = static_cast<vec<tree> *> (fndecls_p);
32587 /* At least one more version other than the default. */
32588 num_versions = fndecls->length ();
32589 gcc_assert (num_versions >= 2);
32591 function_version_info = (struct _function_version_info *)
32592 XNEWVEC (struct _function_version_info, (num_versions - 1));
32594 /* The first version in the vector is the default decl. */
32595 default_decl = (*fndecls)[0];
32597 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
32599 gseq = bb_seq (*empty_bb);
32600 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
32601 constructors, so explicity call __builtin_cpu_init here. */
32602 ifunc_cpu_init_stmt = gimple_build_call_vec (
32603 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
32604 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
32605 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
32606 set_bb_seq (*empty_bb, gseq);
32608 pop_cfun ();
32611 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
32613 tree version_decl = ele;
32614 tree predicate_chain = NULL_TREE;
32615 unsigned int priority;
32616 /* Get attribute string, parse it and find the right predicate decl.
32617 The predicate function could be a lengthy combination of many
32618 features, like arch-type and various isa-variants. */
32619 priority = get_builtin_code_for_version (version_decl,
32620 &predicate_chain);
32622 if (predicate_chain == NULL_TREE)
32623 continue;
32625 function_version_info [actual_versions].version_decl = version_decl;
32626 function_version_info [actual_versions].predicate_chain
32627 = predicate_chain;
32628 function_version_info [actual_versions].dispatch_priority = priority;
32629 actual_versions++;
32632 /* Sort the versions according to descending order of dispatch priority. The
32633 priority is based on the ISA. This is not a perfect solution. There
32634 could still be ambiguity. If more than one function version is suitable
32635 to execute, which one should be dispatched? In future, allow the user
32636 to specify a dispatch priority next to the version. */
32637 qsort (function_version_info, actual_versions,
32638 sizeof (struct _function_version_info), feature_compare);
32640 for (i = 0; i < actual_versions; ++i)
32641 *empty_bb = add_condition_to_bb (dispatch_decl,
32642 function_version_info[i].version_decl,
32643 function_version_info[i].predicate_chain,
32644 *empty_bb);
32646 /* dispatch default version at the end. */
32647 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
32648 NULL, *empty_bb);
32650 free (function_version_info);
32651 return 0;
32654 /* This function changes the assembler name for functions that are
32655 versions. If DECL is a function version and has a "target"
32656 attribute, it appends the attribute string to its assembler name. */
32658 static tree
32659 ix86_mangle_function_version_assembler_name (tree decl, tree id)
32661 tree version_attr;
32662 const char *orig_name, *version_string;
32663 char *attr_str, *assembler_name;
32665 if (DECL_DECLARED_INLINE_P (decl)
32666 && lookup_attribute ("gnu_inline",
32667 DECL_ATTRIBUTES (decl)))
32668 error_at (DECL_SOURCE_LOCATION (decl),
32669 "Function versions cannot be marked as gnu_inline,"
32670 " bodies have to be generated");
32672 if (DECL_VIRTUAL_P (decl)
32673 || DECL_VINDEX (decl))
32674 sorry ("Virtual function multiversioning not supported");
32676 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32678 /* target attribute string cannot be NULL. */
32679 gcc_assert (version_attr != NULL_TREE);
32681 orig_name = IDENTIFIER_POINTER (id);
32682 version_string
32683 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
32685 if (strcmp (version_string, "default") == 0)
32686 return id;
32688 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
32689 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
32691 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
32693 /* Allow assembler name to be modified if already set. */
32694 if (DECL_ASSEMBLER_NAME_SET_P (decl))
32695 SET_DECL_RTL (decl, NULL);
32697 tree ret = get_identifier (assembler_name);
32698 XDELETEVEC (attr_str);
32699 XDELETEVEC (assembler_name);
32700 return ret;
32704 static tree
32705 ix86_mangle_decl_assembler_name (tree decl, tree id)
32707 /* For function version, add the target suffix to the assembler name. */
32708 if (TREE_CODE (decl) == FUNCTION_DECL
32709 && DECL_FUNCTION_VERSIONED (decl))
32710 id = ix86_mangle_function_version_assembler_name (decl, id);
32711 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
32712 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
32713 #endif
32715 return id;
32718 /* Make a dispatcher declaration for the multi-versioned function DECL.
32719 Calls to DECL function will be replaced with calls to the dispatcher
32720 by the front-end. Returns the decl of the dispatcher function. */
32722 static tree
32723 ix86_get_function_versions_dispatcher (void *decl)
32725 tree fn = (tree) decl;
32726 struct cgraph_node *node = NULL;
32727 struct cgraph_node *default_node = NULL;
32728 struct cgraph_function_version_info *node_v = NULL;
32729 struct cgraph_function_version_info *first_v = NULL;
32731 tree dispatch_decl = NULL;
32733 struct cgraph_function_version_info *default_version_info = NULL;
32735 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32737 node = cgraph_node::get (fn);
32738 gcc_assert (node != NULL);
32740 node_v = node->function_version ();
32741 gcc_assert (node_v != NULL);
32743 if (node_v->dispatcher_resolver != NULL)
32744 return node_v->dispatcher_resolver;
32746 /* Find the default version and make it the first node. */
32747 first_v = node_v;
32748 /* Go to the beginning of the chain. */
32749 while (first_v->prev != NULL)
32750 first_v = first_v->prev;
32751 default_version_info = first_v;
32752 while (default_version_info != NULL)
32754 if (is_function_default_version
32755 (default_version_info->this_node->decl))
32756 break;
32757 default_version_info = default_version_info->next;
32760 /* If there is no default node, just return NULL. */
32761 if (default_version_info == NULL)
32762 return NULL;
32764 /* Make default info the first node. */
32765 if (first_v != default_version_info)
32767 default_version_info->prev->next = default_version_info->next;
32768 if (default_version_info->next)
32769 default_version_info->next->prev = default_version_info->prev;
32770 first_v->prev = default_version_info;
32771 default_version_info->next = first_v;
32772 default_version_info->prev = NULL;
32775 default_node = default_version_info->this_node;
32777 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32778 if (targetm.has_ifunc_p ())
32780 struct cgraph_function_version_info *it_v = NULL;
32781 struct cgraph_node *dispatcher_node = NULL;
32782 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32784 /* Right now, the dispatching is done via ifunc. */
32785 dispatch_decl = make_dispatcher_decl (default_node->decl);
32787 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32788 gcc_assert (dispatcher_node != NULL);
32789 dispatcher_node->dispatcher_function = 1;
32790 dispatcher_version_info
32791 = dispatcher_node->insert_new_function_version ();
32792 dispatcher_version_info->next = default_version_info;
32793 dispatcher_node->definition = 1;
32795 /* Set the dispatcher for all the versions. */
32796 it_v = default_version_info;
32797 while (it_v != NULL)
32799 it_v->dispatcher_resolver = dispatch_decl;
32800 it_v = it_v->next;
32803 else
32804 #endif
32806 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32807 "multiversioning needs ifunc which is not supported "
32808 "on this target");
32811 return dispatch_decl;
32814 /* Make the resolver function decl to dispatch the versions of
32815 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
32816 ifunc alias that will point to the created resolver. Create an
32817 empty basic block in the resolver and store the pointer in
32818 EMPTY_BB. Return the decl of the resolver function. */
32820 static tree
32821 make_resolver_func (const tree default_decl,
32822 const tree ifunc_alias_decl,
32823 basic_block *empty_bb)
32825 char *resolver_name;
32826 tree decl, type, decl_name, t;
32828 /* IFUNC's have to be globally visible. So, if the default_decl is
32829 not, then the name of the IFUNC should be made unique. */
32830 if (TREE_PUBLIC (default_decl) == 0)
32832 char *ifunc_name = make_unique_name (default_decl, "ifunc", true);
32833 symtab->change_decl_assembler_name (ifunc_alias_decl,
32834 get_identifier (ifunc_name));
32835 XDELETEVEC (ifunc_name);
32838 resolver_name = make_unique_name (default_decl, "resolver", false);
32840 /* The resolver function should return a (void *). */
32841 type = build_function_type_list (ptr_type_node, NULL_TREE);
32843 decl = build_fn_decl (resolver_name, type);
32844 decl_name = get_identifier (resolver_name);
32845 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32847 DECL_NAME (decl) = decl_name;
32848 TREE_USED (decl) = 1;
32849 DECL_ARTIFICIAL (decl) = 1;
32850 DECL_IGNORED_P (decl) = 1;
32851 TREE_PUBLIC (decl) = 0;
32852 DECL_UNINLINABLE (decl) = 1;
32854 /* Resolver is not external, body is generated. */
32855 DECL_EXTERNAL (decl) = 0;
32856 DECL_EXTERNAL (ifunc_alias_decl) = 0;
32858 DECL_CONTEXT (decl) = NULL_TREE;
32859 DECL_INITIAL (decl) = make_node (BLOCK);
32860 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32862 if (DECL_COMDAT_GROUP (default_decl)
32863 || TREE_PUBLIC (default_decl))
32865 /* In this case, each translation unit with a call to this
32866 versioned function will put out a resolver. Ensure it
32867 is comdat to keep just one copy. */
32868 DECL_COMDAT (decl) = 1;
32869 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32871 /* Build result decl and add to function_decl. */
32872 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32873 DECL_ARTIFICIAL (t) = 1;
32874 DECL_IGNORED_P (t) = 1;
32875 DECL_RESULT (decl) = t;
32877 gimplify_function_tree (decl);
32878 push_cfun (DECL_STRUCT_FUNCTION (decl));
32879 *empty_bb = init_lowered_empty_function (decl, false,
32880 profile_count::uninitialized ());
32882 cgraph_node::add_new_function (decl, true);
32883 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
32885 pop_cfun ();
32887 gcc_assert (ifunc_alias_decl != NULL);
32888 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
32889 DECL_ATTRIBUTES (ifunc_alias_decl)
32890 = make_attribute ("ifunc", resolver_name,
32891 DECL_ATTRIBUTES (ifunc_alias_decl));
32893 /* Create the alias for dispatch to resolver here. */
32894 cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
32895 XDELETEVEC (resolver_name);
32896 return decl;
32899 /* Generate the dispatching code body to dispatch multi-versioned function
32900 DECL. The target hook is called to process the "target" attributes and
32901 provide the code to dispatch the right function at run-time. NODE points
32902 to the dispatcher decl whose body will be created. */
32904 static tree
32905 ix86_generate_version_dispatcher_body (void *node_p)
32907 tree resolver_decl;
32908 basic_block empty_bb;
32909 tree default_ver_decl;
32910 struct cgraph_node *versn;
32911 struct cgraph_node *node;
32913 struct cgraph_function_version_info *node_version_info = NULL;
32914 struct cgraph_function_version_info *versn_info = NULL;
32916 node = (cgraph_node *)node_p;
32918 node_version_info = node->function_version ();
32919 gcc_assert (node->dispatcher_function
32920 && node_version_info != NULL);
32922 if (node_version_info->dispatcher_resolver)
32923 return node_version_info->dispatcher_resolver;
32925 /* The first version in the chain corresponds to the default version. */
32926 default_ver_decl = node_version_info->next->this_node->decl;
32928 /* node is going to be an alias, so remove the finalized bit. */
32929 node->definition = false;
32931 resolver_decl = make_resolver_func (default_ver_decl,
32932 node->decl, &empty_bb);
32934 node_version_info->dispatcher_resolver = resolver_decl;
32936 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32938 auto_vec<tree, 2> fn_ver_vec;
32940 for (versn_info = node_version_info->next; versn_info;
32941 versn_info = versn_info->next)
32943 versn = versn_info->this_node;
32944 /* Check for virtual functions here again, as by this time it should
32945 have been determined if this function needs a vtable index or
32946 not. This happens for methods in derived classes that override
32947 virtual methods in base classes but are not explicitly marked as
32948 virtual. */
32949 if (DECL_VINDEX (versn->decl))
32950 sorry ("Virtual function multiversioning not supported");
32952 fn_ver_vec.safe_push (versn->decl);
32955 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32956 cgraph_edge::rebuild_edges ();
32957 pop_cfun ();
32958 return resolver_decl;
32960 /* This builds the processor_model struct type defined in
32961 libgcc/config/i386/cpuinfo.c */
32963 static tree
32964 build_processor_model_struct (void)
32966 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32967 "__cpu_features"};
32968 tree field = NULL_TREE, field_chain = NULL_TREE;
32969 int i;
32970 tree type = make_node (RECORD_TYPE);
32972 /* The first 3 fields are unsigned int. */
32973 for (i = 0; i < 3; ++i)
32975 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32976 get_identifier (field_name[i]), unsigned_type_node);
32977 if (field_chain != NULL_TREE)
32978 DECL_CHAIN (field) = field_chain;
32979 field_chain = field;
32982 /* The last field is an array of unsigned integers of size one. */
32983 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32984 get_identifier (field_name[3]),
32985 build_array_type (unsigned_type_node,
32986 build_index_type (size_one_node)));
32987 if (field_chain != NULL_TREE)
32988 DECL_CHAIN (field) = field_chain;
32989 field_chain = field;
32991 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32992 return type;
32995 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32997 static tree
32998 make_var_decl (tree type, const char *name)
33000 tree new_decl;
33002 new_decl = build_decl (UNKNOWN_LOCATION,
33003 VAR_DECL,
33004 get_identifier(name),
33005 type);
33007 DECL_EXTERNAL (new_decl) = 1;
33008 TREE_STATIC (new_decl) = 1;
33009 TREE_PUBLIC (new_decl) = 1;
33010 DECL_INITIAL (new_decl) = 0;
33011 DECL_ARTIFICIAL (new_decl) = 0;
33012 DECL_PRESERVE_P (new_decl) = 1;
33014 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
33015 assemble_variable (new_decl, 0, 0, 0);
33017 return new_decl;
33020 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
33021 into an integer defined in libgcc/config/i386/cpuinfo.c */
33023 static tree
33024 fold_builtin_cpu (tree fndecl, tree *args)
33026 unsigned int i;
33027 enum ix86_builtins fn_code = (enum ix86_builtins)
33028 DECL_FUNCTION_CODE (fndecl);
33029 tree param_string_cst = NULL;
33031 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
33032 enum processor_features
33034 F_CMOV = 0,
33035 F_MMX,
33036 F_POPCNT,
33037 F_SSE,
33038 F_SSE2,
33039 F_SSE3,
33040 F_SSSE3,
33041 F_SSE4_1,
33042 F_SSE4_2,
33043 F_AVX,
33044 F_AVX2,
33045 F_SSE4_A,
33046 F_FMA4,
33047 F_XOP,
33048 F_FMA,
33049 F_AVX512F,
33050 F_BMI,
33051 F_BMI2,
33052 F_AES,
33053 F_PCLMUL,
33054 F_AVX512VL,
33055 F_AVX512BW,
33056 F_AVX512DQ,
33057 F_AVX512CD,
33058 F_AVX512ER,
33059 F_AVX512PF,
33060 F_AVX512VBMI,
33061 F_AVX512IFMA,
33062 F_AVX5124VNNIW,
33063 F_AVX5124FMAPS,
33064 F_AVX512VPOPCNTDQ,
33065 F_AVX512VBMI2,
33066 F_GFNI,
33067 F_VPCLMULQDQ,
33068 F_AVX512VNNI,
33069 F_AVX512BITALG,
33070 F_MAX
33073 /* These are the values for vendor types and cpu types and subtypes
33074 in cpuinfo.c. Cpu types and subtypes should be subtracted by
33075 the corresponding start value. */
33076 enum processor_model
33078 M_INTEL = 1,
33079 M_AMD,
33080 M_CPU_TYPE_START,
33081 M_INTEL_BONNELL,
33082 M_INTEL_CORE2,
33083 M_INTEL_COREI7,
33084 M_AMDFAM10H,
33085 M_AMDFAM15H,
33086 M_INTEL_SILVERMONT,
33087 M_INTEL_KNL,
33088 M_AMD_BTVER1,
33089 M_AMD_BTVER2,
33090 M_AMDFAM17H,
33091 M_INTEL_KNM,
33092 M_CPU_SUBTYPE_START,
33093 M_INTEL_COREI7_NEHALEM,
33094 M_INTEL_COREI7_WESTMERE,
33095 M_INTEL_COREI7_SANDYBRIDGE,
33096 M_AMDFAM10H_BARCELONA,
33097 M_AMDFAM10H_SHANGHAI,
33098 M_AMDFAM10H_ISTANBUL,
33099 M_AMDFAM15H_BDVER1,
33100 M_AMDFAM15H_BDVER2,
33101 M_AMDFAM15H_BDVER3,
33102 M_AMDFAM15H_BDVER4,
33103 M_AMDFAM17H_ZNVER1,
33104 M_INTEL_COREI7_IVYBRIDGE,
33105 M_INTEL_COREI7_HASWELL,
33106 M_INTEL_COREI7_BROADWELL,
33107 M_INTEL_COREI7_SKYLAKE,
33108 M_INTEL_COREI7_SKYLAKE_AVX512,
33109 M_INTEL_COREI7_CANNONLAKE,
33110 M_INTEL_COREI7_ICELAKE_CLIENT,
33111 M_INTEL_COREI7_ICELAKE_SERVER
33114 static struct _arch_names_table
33116 const char *const name;
33117 const enum processor_model model;
33119 const arch_names_table[] =
33121 {"amd", M_AMD},
33122 {"intel", M_INTEL},
33123 {"atom", M_INTEL_BONNELL},
33124 {"slm", M_INTEL_SILVERMONT},
33125 {"core2", M_INTEL_CORE2},
33126 {"corei7", M_INTEL_COREI7},
33127 {"nehalem", M_INTEL_COREI7_NEHALEM},
33128 {"westmere", M_INTEL_COREI7_WESTMERE},
33129 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
33130 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
33131 {"haswell", M_INTEL_COREI7_HASWELL},
33132 {"broadwell", M_INTEL_COREI7_BROADWELL},
33133 {"skylake", M_INTEL_COREI7_SKYLAKE},
33134 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
33135 {"cannonlake", M_INTEL_COREI7_CANNONLAKE},
33136 {"icelake-client", M_INTEL_COREI7_ICELAKE_CLIENT},
33137 {"icelake-server", M_INTEL_COREI7_ICELAKE_SERVER},
33138 {"bonnell", M_INTEL_BONNELL},
33139 {"silvermont", M_INTEL_SILVERMONT},
33140 {"knl", M_INTEL_KNL},
33141 {"knm", M_INTEL_KNM},
33142 {"amdfam10h", M_AMDFAM10H},
33143 {"barcelona", M_AMDFAM10H_BARCELONA},
33144 {"shanghai", M_AMDFAM10H_SHANGHAI},
33145 {"istanbul", M_AMDFAM10H_ISTANBUL},
33146 {"btver1", M_AMD_BTVER1},
33147 {"amdfam15h", M_AMDFAM15H},
33148 {"bdver1", M_AMDFAM15H_BDVER1},
33149 {"bdver2", M_AMDFAM15H_BDVER2},
33150 {"bdver3", M_AMDFAM15H_BDVER3},
33151 {"bdver4", M_AMDFAM15H_BDVER4},
33152 {"btver2", M_AMD_BTVER2},
33153 {"amdfam17h", M_AMDFAM17H},
33154 {"znver1", M_AMDFAM17H_ZNVER1},
33157 static struct _isa_names_table
33159 const char *const name;
33160 const enum processor_features feature;
33162 const isa_names_table[] =
33164 {"cmov", F_CMOV},
33165 {"mmx", F_MMX},
33166 {"popcnt", F_POPCNT},
33167 {"sse", F_SSE},
33168 {"sse2", F_SSE2},
33169 {"sse3", F_SSE3},
33170 {"ssse3", F_SSSE3},
33171 {"sse4a", F_SSE4_A},
33172 {"sse4.1", F_SSE4_1},
33173 {"sse4.2", F_SSE4_2},
33174 {"avx", F_AVX},
33175 {"fma4", F_FMA4},
33176 {"xop", F_XOP},
33177 {"fma", F_FMA},
33178 {"avx2", F_AVX2},
33179 {"avx512f", F_AVX512F},
33180 {"bmi", F_BMI},
33181 {"bmi2", F_BMI2},
33182 {"aes", F_AES},
33183 {"pclmul", F_PCLMUL},
33184 {"avx512vl",F_AVX512VL},
33185 {"avx512bw",F_AVX512BW},
33186 {"avx512dq",F_AVX512DQ},
33187 {"avx512cd",F_AVX512CD},
33188 {"avx512er",F_AVX512ER},
33189 {"avx512pf",F_AVX512PF},
33190 {"avx512vbmi",F_AVX512VBMI},
33191 {"avx512ifma",F_AVX512IFMA},
33192 {"avx5124vnniw",F_AVX5124VNNIW},
33193 {"avx5124fmaps",F_AVX5124FMAPS},
33194 {"avx512vpopcntdq",F_AVX512VPOPCNTDQ},
33195 {"avx512vbmi2", F_AVX512VBMI2},
33196 {"gfni", F_GFNI},
33197 {"vpclmulqdq", F_VPCLMULQDQ},
33198 {"avx512vnni", F_AVX512VNNI},
33199 {"avx512bitalg", F_AVX512BITALG}
33202 tree __processor_model_type = build_processor_model_struct ();
33203 tree __cpu_model_var = make_var_decl (__processor_model_type,
33204 "__cpu_model");
33207 varpool_node::add (__cpu_model_var);
33209 gcc_assert ((args != NULL) && (*args != NULL));
33211 param_string_cst = *args;
33212 while (param_string_cst
33213 && TREE_CODE (param_string_cst) != STRING_CST)
33215 /* *args must be a expr that can contain other EXPRS leading to a
33216 STRING_CST. */
33217 if (!EXPR_P (param_string_cst))
33219 error ("Parameter to builtin must be a string constant or literal");
33220 return integer_zero_node;
33222 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
33225 gcc_assert (param_string_cst);
33227 if (fn_code == IX86_BUILTIN_CPU_IS)
33229 tree ref;
33230 tree field;
33231 tree final;
33233 unsigned int field_val = 0;
33234 unsigned int NUM_ARCH_NAMES
33235 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
33237 for (i = 0; i < NUM_ARCH_NAMES; i++)
33238 if (strcmp (arch_names_table[i].name,
33239 TREE_STRING_POINTER (param_string_cst)) == 0)
33240 break;
33242 if (i == NUM_ARCH_NAMES)
33244 error ("Parameter to builtin not valid: %s",
33245 TREE_STRING_POINTER (param_string_cst));
33246 return integer_zero_node;
33249 field = TYPE_FIELDS (__processor_model_type);
33250 field_val = arch_names_table[i].model;
33252 /* CPU types are stored in the next field. */
33253 if (field_val > M_CPU_TYPE_START
33254 && field_val < M_CPU_SUBTYPE_START)
33256 field = DECL_CHAIN (field);
33257 field_val -= M_CPU_TYPE_START;
33260 /* CPU subtypes are stored in the next field. */
33261 if (field_val > M_CPU_SUBTYPE_START)
33263 field = DECL_CHAIN ( DECL_CHAIN (field));
33264 field_val -= M_CPU_SUBTYPE_START;
33267 /* Get the appropriate field in __cpu_model. */
33268 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
33269 field, NULL_TREE);
33271 /* Check the value. */
33272 final = build2 (EQ_EXPR, unsigned_type_node, ref,
33273 build_int_cstu (unsigned_type_node, field_val));
33274 return build1 (CONVERT_EXPR, integer_type_node, final);
33276 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
33278 tree ref;
33279 tree array_elt;
33280 tree field;
33281 tree final;
33283 unsigned int field_val = 0;
33284 unsigned int NUM_ISA_NAMES
33285 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
33287 for (i = 0; i < NUM_ISA_NAMES; i++)
33288 if (strcmp (isa_names_table[i].name,
33289 TREE_STRING_POINTER (param_string_cst)) == 0)
33290 break;
33292 if (i == NUM_ISA_NAMES)
33294 error ("Parameter to builtin not valid: %s",
33295 TREE_STRING_POINTER (param_string_cst));
33296 return integer_zero_node;
33299 field = TYPE_FIELDS (__processor_model_type);
33300 /* Get the last field, which is __cpu_features. */
33301 while (DECL_CHAIN (field))
33302 field = DECL_CHAIN (field);
33304 /* Get the appropriate field: __cpu_model.__cpu_features */
33305 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
33306 field, NULL_TREE);
33308 /* Access the 0th element of __cpu_features array. */
33309 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
33310 integer_zero_node, NULL_TREE, NULL_TREE);
33312 field_val = (1 << isa_names_table[i].feature);
33313 /* Return __cpu_model.__cpu_features[0] & field_val */
33314 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
33315 build_int_cstu (unsigned_type_node, field_val));
33316 return build1 (CONVERT_EXPR, integer_type_node, final);
33318 gcc_unreachable ();
33321 static tree
33322 ix86_fold_builtin (tree fndecl, int n_args,
33323 tree *args, bool ignore ATTRIBUTE_UNUSED)
33325 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
33327 enum ix86_builtins fn_code = (enum ix86_builtins)
33328 DECL_FUNCTION_CODE (fndecl);
33329 switch (fn_code)
33331 case IX86_BUILTIN_CPU_IS:
33332 case IX86_BUILTIN_CPU_SUPPORTS:
33333 gcc_assert (n_args == 1);
33334 return fold_builtin_cpu (fndecl, args);
33336 case IX86_BUILTIN_NANQ:
33337 case IX86_BUILTIN_NANSQ:
33339 tree type = TREE_TYPE (TREE_TYPE (fndecl));
33340 const char *str = c_getstr (*args);
33341 int quiet = fn_code == IX86_BUILTIN_NANQ;
33342 REAL_VALUE_TYPE real;
33344 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
33345 return build_real (type, real);
33346 return NULL_TREE;
33349 case IX86_BUILTIN_INFQ:
33350 case IX86_BUILTIN_HUGE_VALQ:
33352 tree type = TREE_TYPE (TREE_TYPE (fndecl));
33353 REAL_VALUE_TYPE inf;
33354 real_inf (&inf);
33355 return build_real (type, inf);
33358 case IX86_BUILTIN_TZCNT16:
33359 case IX86_BUILTIN_CTZS:
33360 case IX86_BUILTIN_TZCNT32:
33361 case IX86_BUILTIN_TZCNT64:
33362 gcc_assert (n_args == 1);
33363 if (TREE_CODE (args[0]) == INTEGER_CST)
33365 tree type = TREE_TYPE (TREE_TYPE (fndecl));
33366 tree arg = args[0];
33367 if (fn_code == IX86_BUILTIN_TZCNT16
33368 || fn_code == IX86_BUILTIN_CTZS)
33369 arg = fold_convert (short_unsigned_type_node, arg);
33370 if (integer_zerop (arg))
33371 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
33372 else
33373 return fold_const_call (CFN_CTZ, type, arg);
33375 break;
33377 case IX86_BUILTIN_LZCNT16:
33378 case IX86_BUILTIN_CLZS:
33379 case IX86_BUILTIN_LZCNT32:
33380 case IX86_BUILTIN_LZCNT64:
33381 gcc_assert (n_args == 1);
33382 if (TREE_CODE (args[0]) == INTEGER_CST)
33384 tree type = TREE_TYPE (TREE_TYPE (fndecl));
33385 tree arg = args[0];
33386 if (fn_code == IX86_BUILTIN_LZCNT16
33387 || fn_code == IX86_BUILTIN_CLZS)
33388 arg = fold_convert (short_unsigned_type_node, arg);
33389 if (integer_zerop (arg))
33390 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
33391 else
33392 return fold_const_call (CFN_CLZ, type, arg);
33394 break;
33396 case IX86_BUILTIN_BEXTR32:
33397 case IX86_BUILTIN_BEXTR64:
33398 case IX86_BUILTIN_BEXTRI32:
33399 case IX86_BUILTIN_BEXTRI64:
33400 gcc_assert (n_args == 2);
33401 if (tree_fits_uhwi_p (args[1]))
33403 unsigned HOST_WIDE_INT res = 0;
33404 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
33405 unsigned int start = tree_to_uhwi (args[1]);
33406 unsigned int len = (start & 0xff00) >> 8;
33407 start &= 0xff;
33408 if (start >= prec || len == 0)
33409 res = 0;
33410 else if (!tree_fits_uhwi_p (args[0]))
33411 break;
33412 else
33413 res = tree_to_uhwi (args[0]) >> start;
33414 if (len > prec)
33415 len = prec;
33416 if (len < HOST_BITS_PER_WIDE_INT)
33417 res &= (HOST_WIDE_INT_1U << len) - 1;
33418 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33420 break;
33422 case IX86_BUILTIN_BZHI32:
33423 case IX86_BUILTIN_BZHI64:
33424 gcc_assert (n_args == 2);
33425 if (tree_fits_uhwi_p (args[1]))
33427 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
33428 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
33429 return args[0];
33430 if (!tree_fits_uhwi_p (args[0]))
33431 break;
33432 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
33433 res &= ~(HOST_WIDE_INT_M1U << idx);
33434 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33436 break;
33438 case IX86_BUILTIN_PDEP32:
33439 case IX86_BUILTIN_PDEP64:
33440 gcc_assert (n_args == 2);
33441 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
33443 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
33444 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
33445 unsigned HOST_WIDE_INT res = 0;
33446 unsigned HOST_WIDE_INT m, k = 1;
33447 for (m = 1; m; m <<= 1)
33448 if ((mask & m) != 0)
33450 if ((src & k) != 0)
33451 res |= m;
33452 k <<= 1;
33454 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33456 break;
33458 case IX86_BUILTIN_PEXT32:
33459 case IX86_BUILTIN_PEXT64:
33460 gcc_assert (n_args == 2);
33461 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
33463 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
33464 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
33465 unsigned HOST_WIDE_INT res = 0;
33466 unsigned HOST_WIDE_INT m, k = 1;
33467 for (m = 1; m; m <<= 1)
33468 if ((mask & m) != 0)
33470 if ((src & m) != 0)
33471 res |= k;
33472 k <<= 1;
33474 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33476 break;
33478 default:
33479 break;
33483 #ifdef SUBTARGET_FOLD_BUILTIN
33484 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
33485 #endif
33487 return NULL_TREE;
33490 /* Fold a MD builtin (use ix86_fold_builtin for folding into
33491 constant) in GIMPLE. */
33493 bool
33494 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
33496 gimple *stmt = gsi_stmt (*gsi);
33497 tree fndecl = gimple_call_fndecl (stmt);
33498 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
33499 int n_args = gimple_call_num_args (stmt);
33500 enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl);
33501 tree decl = NULL_TREE;
33502 tree arg0, arg1;
33504 switch (fn_code)
33506 case IX86_BUILTIN_TZCNT32:
33507 decl = builtin_decl_implicit (BUILT_IN_CTZ);
33508 goto fold_tzcnt_lzcnt;
33510 case IX86_BUILTIN_TZCNT64:
33511 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
33512 goto fold_tzcnt_lzcnt;
33514 case IX86_BUILTIN_LZCNT32:
33515 decl = builtin_decl_implicit (BUILT_IN_CLZ);
33516 goto fold_tzcnt_lzcnt;
33518 case IX86_BUILTIN_LZCNT64:
33519 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
33520 goto fold_tzcnt_lzcnt;
33522 fold_tzcnt_lzcnt:
33523 gcc_assert (n_args == 1);
33524 arg0 = gimple_call_arg (stmt, 0);
33525 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
33527 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
33528 /* If arg0 is provably non-zero, optimize into generic
33529 __builtin_c[tl]z{,ll} function the middle-end handles
33530 better. */
33531 if (!expr_not_equal_to (arg0, wi::zero (prec)))
33532 return false;
33534 location_t loc = gimple_location (stmt);
33535 gimple *g = gimple_build_call (decl, 1, arg0);
33536 gimple_set_location (g, loc);
33537 tree lhs = make_ssa_name (integer_type_node);
33538 gimple_call_set_lhs (g, lhs);
33539 gsi_insert_before (gsi, g, GSI_SAME_STMT);
33540 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
33541 gimple_set_location (g, loc);
33542 gsi_replace (gsi, g, false);
33543 return true;
33545 break;
33547 case IX86_BUILTIN_BZHI32:
33548 case IX86_BUILTIN_BZHI64:
33549 gcc_assert (n_args == 2);
33550 arg1 = gimple_call_arg (stmt, 1);
33551 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
33553 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
33554 arg0 = gimple_call_arg (stmt, 0);
33555 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
33556 break;
33557 location_t loc = gimple_location (stmt);
33558 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
33559 gimple_set_location (g, loc);
33560 gsi_replace (gsi, g, false);
33561 return true;
33563 break;
33565 case IX86_BUILTIN_PDEP32:
33566 case IX86_BUILTIN_PDEP64:
33567 case IX86_BUILTIN_PEXT32:
33568 case IX86_BUILTIN_PEXT64:
33569 gcc_assert (n_args == 2);
33570 arg1 = gimple_call_arg (stmt, 1);
33571 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
33573 location_t loc = gimple_location (stmt);
33574 arg0 = gimple_call_arg (stmt, 0);
33575 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
33576 gimple_set_location (g, loc);
33577 gsi_replace (gsi, g, false);
33578 return true;
33580 break;
33582 default:
33583 break;
33586 return false;
33589 /* Make builtins to detect cpu type and features supported. NAME is
33590 the builtin name, CODE is the builtin code, and FTYPE is the function
33591 type of the builtin. */
33593 static void
33594 make_cpu_type_builtin (const char* name, int code,
33595 enum ix86_builtin_func_type ftype, bool is_const)
33597 tree decl;
33598 tree type;
33600 type = ix86_get_builtin_func_type (ftype);
33601 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
33602 NULL, NULL_TREE);
33603 gcc_assert (decl != NULL_TREE);
33604 ix86_builtins[(int) code] = decl;
33605 TREE_READONLY (decl) = is_const;
33608 /* Make builtins to get CPU type and features supported. The created
33609 builtins are :
33611 __builtin_cpu_init (), to detect cpu type and features,
33612 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
33613 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
33616 static void
33617 ix86_init_platform_type_builtins (void)
33619 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
33620 INT_FTYPE_VOID, false);
33621 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
33622 INT_FTYPE_PCCHAR, true);
33623 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
33624 INT_FTYPE_PCCHAR, true);
33627 /* Internal method for ix86_init_builtins. */
33629 static void
33630 ix86_init_builtins_va_builtins_abi (void)
33632 tree ms_va_ref, sysv_va_ref;
33633 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
33634 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
33635 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
33636 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
33638 if (!TARGET_64BIT)
33639 return;
33640 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
33641 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
33642 ms_va_ref = build_reference_type (ms_va_list_type_node);
33643 sysv_va_ref =
33644 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
33646 fnvoid_va_end_ms =
33647 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
33648 fnvoid_va_start_ms =
33649 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
33650 fnvoid_va_end_sysv =
33651 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
33652 fnvoid_va_start_sysv =
33653 build_varargs_function_type_list (void_type_node, sysv_va_ref,
33654 NULL_TREE);
33655 fnvoid_va_copy_ms =
33656 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
33657 NULL_TREE);
33658 fnvoid_va_copy_sysv =
33659 build_function_type_list (void_type_node, sysv_va_ref,
33660 sysv_va_ref, NULL_TREE);
33662 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
33663 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
33664 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
33665 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
33666 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
33667 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
33668 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
33669 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33670 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
33671 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33672 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
33673 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33676 static void
33677 ix86_init_builtin_types (void)
33679 tree float80_type_node, const_string_type_node;
33681 /* The __float80 type. */
33682 float80_type_node = long_double_type_node;
33683 if (TYPE_MODE (float80_type_node) != XFmode)
33685 if (float64x_type_node != NULL_TREE
33686 && TYPE_MODE (float64x_type_node) == XFmode)
33687 float80_type_node = float64x_type_node;
33688 else
33690 /* The __float80 type. */
33691 float80_type_node = make_node (REAL_TYPE);
33693 TYPE_PRECISION (float80_type_node) = 80;
33694 layout_type (float80_type_node);
33697 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
33699 /* The __float128 type. The node has already been created as
33700 _Float128, so we only need to register the __float128 name for
33701 it. */
33702 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
33704 const_string_type_node
33705 = build_pointer_type (build_qualified_type
33706 (char_type_node, TYPE_QUAL_CONST));
33708 /* This macro is built by i386-builtin-types.awk. */
33709 DEFINE_BUILTIN_PRIMITIVE_TYPES;
33712 static void
33713 ix86_init_builtins (void)
33715 tree ftype, decl;
33717 ix86_init_builtin_types ();
33719 /* Builtins to get CPU type and features. */
33720 ix86_init_platform_type_builtins ();
33722 /* TFmode support builtins. */
33723 def_builtin_const (0, "__builtin_infq",
33724 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
33725 def_builtin_const (0, "__builtin_huge_valq",
33726 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
33728 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING);
33729 decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ,
33730 BUILT_IN_MD, "nanq", NULL_TREE);
33731 TREE_READONLY (decl) = 1;
33732 ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl;
33734 decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ,
33735 BUILT_IN_MD, "nansq", NULL_TREE);
33736 TREE_READONLY (decl) = 1;
33737 ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl;
33739 /* We will expand them to normal call if SSE isn't available since
33740 they are used by libgcc. */
33741 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
33742 decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ,
33743 BUILT_IN_MD, "__fabstf2", NULL_TREE);
33744 TREE_READONLY (decl) = 1;
33745 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
33747 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
33748 decl = add_builtin_function ("__builtin_copysignq", ftype,
33749 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
33750 "__copysigntf3", NULL_TREE);
33751 TREE_READONLY (decl) = 1;
33752 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
33754 ix86_init_tm_builtins ();
33755 ix86_init_mmx_sse_builtins ();
33756 ix86_init_mpx_builtins ();
33758 if (TARGET_LP64)
33759 ix86_init_builtins_va_builtins_abi ();
33761 #ifdef SUBTARGET_INIT_BUILTINS
33762 SUBTARGET_INIT_BUILTINS;
33763 #endif
33766 /* Return the ix86 builtin for CODE. */
33768 static tree
33769 ix86_builtin_decl (unsigned code, bool)
33771 if (code >= IX86_BUILTIN_MAX)
33772 return error_mark_node;
33774 return ix86_builtins[code];
33777 /* Errors in the source file can cause expand_expr to return const0_rtx
33778 where we expect a vector. To avoid crashing, use one of the vector
33779 clear instructions. */
33780 static rtx
33781 safe_vector_operand (rtx x, machine_mode mode)
33783 if (x == const0_rtx)
33784 x = CONST0_RTX (mode);
33785 return x;
33788 /* Fixup modeless constants to fit required mode. */
33789 static rtx
33790 fixup_modeless_constant (rtx x, machine_mode mode)
33792 if (GET_MODE (x) == VOIDmode)
33793 x = convert_to_mode (mode, x, 1);
33794 return x;
33797 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
33799 static rtx
33800 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
33802 rtx pat;
33803 tree arg0 = CALL_EXPR_ARG (exp, 0);
33804 tree arg1 = CALL_EXPR_ARG (exp, 1);
33805 rtx op0 = expand_normal (arg0);
33806 rtx op1 = expand_normal (arg1);
33807 machine_mode tmode = insn_data[icode].operand[0].mode;
33808 machine_mode mode0 = insn_data[icode].operand[1].mode;
33809 machine_mode mode1 = insn_data[icode].operand[2].mode;
33811 if (VECTOR_MODE_P (mode0))
33812 op0 = safe_vector_operand (op0, mode0);
33813 if (VECTOR_MODE_P (mode1))
33814 op1 = safe_vector_operand (op1, mode1);
33816 if (optimize || !target
33817 || GET_MODE (target) != tmode
33818 || !insn_data[icode].operand[0].predicate (target, tmode))
33819 target = gen_reg_rtx (tmode);
33821 if (GET_MODE (op1) == SImode && mode1 == TImode)
33823 rtx x = gen_reg_rtx (V4SImode);
33824 emit_insn (gen_sse2_loadd (x, op1));
33825 op1 = gen_lowpart (TImode, x);
33828 if (!insn_data[icode].operand[1].predicate (op0, mode0))
33829 op0 = copy_to_mode_reg (mode0, op0);
33830 if (!insn_data[icode].operand[2].predicate (op1, mode1))
33831 op1 = copy_to_mode_reg (mode1, op1);
33833 pat = GEN_FCN (icode) (target, op0, op1);
33834 if (! pat)
33835 return 0;
33837 emit_insn (pat);
33839 return target;
33842 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
33844 static rtx
33845 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
33846 enum ix86_builtin_func_type m_type,
33847 enum rtx_code sub_code)
33849 rtx pat;
33850 int i;
33851 int nargs;
33852 bool comparison_p = false;
33853 bool tf_p = false;
33854 bool last_arg_constant = false;
33855 int num_memory = 0;
33856 struct {
33857 rtx op;
33858 machine_mode mode;
33859 } args[4];
33861 machine_mode tmode = insn_data[icode].operand[0].mode;
33863 switch (m_type)
33865 case MULTI_ARG_4_DF2_DI_I:
33866 case MULTI_ARG_4_DF2_DI_I1:
33867 case MULTI_ARG_4_SF2_SI_I:
33868 case MULTI_ARG_4_SF2_SI_I1:
33869 nargs = 4;
33870 last_arg_constant = true;
33871 break;
33873 case MULTI_ARG_3_SF:
33874 case MULTI_ARG_3_DF:
33875 case MULTI_ARG_3_SF2:
33876 case MULTI_ARG_3_DF2:
33877 case MULTI_ARG_3_DI:
33878 case MULTI_ARG_3_SI:
33879 case MULTI_ARG_3_SI_DI:
33880 case MULTI_ARG_3_HI:
33881 case MULTI_ARG_3_HI_SI:
33882 case MULTI_ARG_3_QI:
33883 case MULTI_ARG_3_DI2:
33884 case MULTI_ARG_3_SI2:
33885 case MULTI_ARG_3_HI2:
33886 case MULTI_ARG_3_QI2:
33887 nargs = 3;
33888 break;
33890 case MULTI_ARG_2_SF:
33891 case MULTI_ARG_2_DF:
33892 case MULTI_ARG_2_DI:
33893 case MULTI_ARG_2_SI:
33894 case MULTI_ARG_2_HI:
33895 case MULTI_ARG_2_QI:
33896 nargs = 2;
33897 break;
33899 case MULTI_ARG_2_DI_IMM:
33900 case MULTI_ARG_2_SI_IMM:
33901 case MULTI_ARG_2_HI_IMM:
33902 case MULTI_ARG_2_QI_IMM:
33903 nargs = 2;
33904 last_arg_constant = true;
33905 break;
33907 case MULTI_ARG_1_SF:
33908 case MULTI_ARG_1_DF:
33909 case MULTI_ARG_1_SF2:
33910 case MULTI_ARG_1_DF2:
33911 case MULTI_ARG_1_DI:
33912 case MULTI_ARG_1_SI:
33913 case MULTI_ARG_1_HI:
33914 case MULTI_ARG_1_QI:
33915 case MULTI_ARG_1_SI_DI:
33916 case MULTI_ARG_1_HI_DI:
33917 case MULTI_ARG_1_HI_SI:
33918 case MULTI_ARG_1_QI_DI:
33919 case MULTI_ARG_1_QI_SI:
33920 case MULTI_ARG_1_QI_HI:
33921 nargs = 1;
33922 break;
33924 case MULTI_ARG_2_DI_CMP:
33925 case MULTI_ARG_2_SI_CMP:
33926 case MULTI_ARG_2_HI_CMP:
33927 case MULTI_ARG_2_QI_CMP:
33928 nargs = 2;
33929 comparison_p = true;
33930 break;
33932 case MULTI_ARG_2_SF_TF:
33933 case MULTI_ARG_2_DF_TF:
33934 case MULTI_ARG_2_DI_TF:
33935 case MULTI_ARG_2_SI_TF:
33936 case MULTI_ARG_2_HI_TF:
33937 case MULTI_ARG_2_QI_TF:
33938 nargs = 2;
33939 tf_p = true;
33940 break;
33942 default:
33943 gcc_unreachable ();
33946 if (optimize || !target
33947 || GET_MODE (target) != tmode
33948 || !insn_data[icode].operand[0].predicate (target, tmode))
33949 target = gen_reg_rtx (tmode);
33950 else if (memory_operand (target, tmode))
33951 num_memory++;
33953 gcc_assert (nargs <= 4);
33955 for (i = 0; i < nargs; i++)
33957 tree arg = CALL_EXPR_ARG (exp, i);
33958 rtx op = expand_normal (arg);
33959 int adjust = (comparison_p) ? 1 : 0;
33960 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
33962 if (last_arg_constant && i == nargs - 1)
33964 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
33966 enum insn_code new_icode = icode;
33967 switch (icode)
33969 case CODE_FOR_xop_vpermil2v2df3:
33970 case CODE_FOR_xop_vpermil2v4sf3:
33971 case CODE_FOR_xop_vpermil2v4df3:
33972 case CODE_FOR_xop_vpermil2v8sf3:
33973 error ("the last argument must be a 2-bit immediate");
33974 return gen_reg_rtx (tmode);
33975 case CODE_FOR_xop_rotlv2di3:
33976 new_icode = CODE_FOR_rotlv2di3;
33977 goto xop_rotl;
33978 case CODE_FOR_xop_rotlv4si3:
33979 new_icode = CODE_FOR_rotlv4si3;
33980 goto xop_rotl;
33981 case CODE_FOR_xop_rotlv8hi3:
33982 new_icode = CODE_FOR_rotlv8hi3;
33983 goto xop_rotl;
33984 case CODE_FOR_xop_rotlv16qi3:
33985 new_icode = CODE_FOR_rotlv16qi3;
33986 xop_rotl:
33987 if (CONST_INT_P (op))
33989 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
33990 op = GEN_INT (INTVAL (op) & mask);
33991 gcc_checking_assert
33992 (insn_data[icode].operand[i + 1].predicate (op, mode));
33994 else
33996 gcc_checking_assert
33997 (nargs == 2
33998 && insn_data[new_icode].operand[0].mode == tmode
33999 && insn_data[new_icode].operand[1].mode == tmode
34000 && insn_data[new_icode].operand[2].mode == mode
34001 && insn_data[new_icode].operand[0].predicate
34002 == insn_data[icode].operand[0].predicate
34003 && insn_data[new_icode].operand[1].predicate
34004 == insn_data[icode].operand[1].predicate);
34005 icode = new_icode;
34006 goto non_constant;
34008 break;
34009 default:
34010 gcc_unreachable ();
34014 else
34016 non_constant:
34017 if (VECTOR_MODE_P (mode))
34018 op = safe_vector_operand (op, mode);
34020 /* If we aren't optimizing, only allow one memory operand to be
34021 generated. */
34022 if (memory_operand (op, mode))
34023 num_memory++;
34025 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
34027 if (optimize
34028 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
34029 || num_memory > 1)
34030 op = force_reg (mode, op);
34033 args[i].op = op;
34034 args[i].mode = mode;
34037 switch (nargs)
34039 case 1:
34040 pat = GEN_FCN (icode) (target, args[0].op);
34041 break;
34043 case 2:
34044 if (tf_p)
34045 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34046 GEN_INT ((int)sub_code));
34047 else if (! comparison_p)
34048 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34049 else
34051 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
34052 args[0].op,
34053 args[1].op);
34055 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
34057 break;
34059 case 3:
34060 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34061 break;
34063 case 4:
34064 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
34065 break;
34067 default:
34068 gcc_unreachable ();
34071 if (! pat)
34072 return 0;
34074 emit_insn (pat);
34075 return target;
34078 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
34079 insns with vec_merge. */
34081 static rtx
34082 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
34083 rtx target)
34085 rtx pat;
34086 tree arg0 = CALL_EXPR_ARG (exp, 0);
34087 rtx op1, op0 = expand_normal (arg0);
34088 machine_mode tmode = insn_data[icode].operand[0].mode;
34089 machine_mode mode0 = insn_data[icode].operand[1].mode;
34091 if (optimize || !target
34092 || GET_MODE (target) != tmode
34093 || !insn_data[icode].operand[0].predicate (target, tmode))
34094 target = gen_reg_rtx (tmode);
34096 if (VECTOR_MODE_P (mode0))
34097 op0 = safe_vector_operand (op0, mode0);
34099 if ((optimize && !register_operand (op0, mode0))
34100 || !insn_data[icode].operand[1].predicate (op0, mode0))
34101 op0 = copy_to_mode_reg (mode0, op0);
34103 op1 = op0;
34104 if (!insn_data[icode].operand[2].predicate (op1, mode0))
34105 op1 = copy_to_mode_reg (mode0, op1);
34107 pat = GEN_FCN (icode) (target, op0, op1);
34108 if (! pat)
34109 return 0;
34110 emit_insn (pat);
34111 return target;
34114 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
34116 static rtx
34117 ix86_expand_sse_compare (const struct builtin_description *d,
34118 tree exp, rtx target, bool swap)
34120 rtx pat;
34121 tree arg0 = CALL_EXPR_ARG (exp, 0);
34122 tree arg1 = CALL_EXPR_ARG (exp, 1);
34123 rtx op0 = expand_normal (arg0);
34124 rtx op1 = expand_normal (arg1);
34125 rtx op2;
34126 machine_mode tmode = insn_data[d->icode].operand[0].mode;
34127 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
34128 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
34129 enum rtx_code comparison = d->comparison;
34131 if (VECTOR_MODE_P (mode0))
34132 op0 = safe_vector_operand (op0, mode0);
34133 if (VECTOR_MODE_P (mode1))
34134 op1 = safe_vector_operand (op1, mode1);
34136 /* Swap operands if we have a comparison that isn't available in
34137 hardware. */
34138 if (swap)
34139 std::swap (op0, op1);
34141 if (optimize || !target
34142 || GET_MODE (target) != tmode
34143 || !insn_data[d->icode].operand[0].predicate (target, tmode))
34144 target = gen_reg_rtx (tmode);
34146 if ((optimize && !register_operand (op0, mode0))
34147 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
34148 op0 = copy_to_mode_reg (mode0, op0);
34149 if ((optimize && !register_operand (op1, mode1))
34150 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
34151 op1 = copy_to_mode_reg (mode1, op1);
34153 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
34154 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
34155 if (! pat)
34156 return 0;
34157 emit_insn (pat);
34158 return target;
34161 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
34163 static rtx
34164 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
34165 rtx target)
34167 rtx pat;
34168 tree arg0 = CALL_EXPR_ARG (exp, 0);
34169 tree arg1 = CALL_EXPR_ARG (exp, 1);
34170 rtx op0 = expand_normal (arg0);
34171 rtx op1 = expand_normal (arg1);
34172 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
34173 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
34174 enum rtx_code comparison = d->comparison;
34176 if (VECTOR_MODE_P (mode0))
34177 op0 = safe_vector_operand (op0, mode0);
34178 if (VECTOR_MODE_P (mode1))
34179 op1 = safe_vector_operand (op1, mode1);
34181 /* Swap operands if we have a comparison that isn't available in
34182 hardware. */
34183 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
34184 std::swap (op0, op1);
34186 target = gen_reg_rtx (SImode);
34187 emit_move_insn (target, const0_rtx);
34188 target = gen_rtx_SUBREG (QImode, target, 0);
34190 if ((optimize && !register_operand (op0, mode0))
34191 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34192 op0 = copy_to_mode_reg (mode0, op0);
34193 if ((optimize && !register_operand (op1, mode1))
34194 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
34195 op1 = copy_to_mode_reg (mode1, op1);
34197 pat = GEN_FCN (d->icode) (op0, op1);
34198 if (! pat)
34199 return 0;
34200 emit_insn (pat);
34201 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34202 gen_rtx_fmt_ee (comparison, QImode,
34203 SET_DEST (pat),
34204 const0_rtx)));
34206 return SUBREG_REG (target);
34209 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
34211 static rtx
34212 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
34213 rtx target)
34215 rtx pat;
34216 tree arg0 = CALL_EXPR_ARG (exp, 0);
34217 rtx op1, op0 = expand_normal (arg0);
34218 machine_mode tmode = insn_data[d->icode].operand[0].mode;
34219 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
34221 if (optimize || target == 0
34222 || GET_MODE (target) != tmode
34223 || !insn_data[d->icode].operand[0].predicate (target, tmode))
34224 target = gen_reg_rtx (tmode);
34226 if (VECTOR_MODE_P (mode0))
34227 op0 = safe_vector_operand (op0, mode0);
34229 if ((optimize && !register_operand (op0, mode0))
34230 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34231 op0 = copy_to_mode_reg (mode0, op0);
34233 op1 = GEN_INT (d->comparison);
34235 pat = GEN_FCN (d->icode) (target, op0, op1);
34236 if (! pat)
34237 return 0;
34238 emit_insn (pat);
34239 return target;
34242 static rtx
34243 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
34244 tree exp, rtx target)
34246 rtx pat;
34247 tree arg0 = CALL_EXPR_ARG (exp, 0);
34248 tree arg1 = CALL_EXPR_ARG (exp, 1);
34249 rtx op0 = expand_normal (arg0);
34250 rtx op1 = expand_normal (arg1);
34251 rtx op2;
34252 machine_mode tmode = insn_data[d->icode].operand[0].mode;
34253 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
34254 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
34256 if (optimize || target == 0
34257 || GET_MODE (target) != tmode
34258 || !insn_data[d->icode].operand[0].predicate (target, tmode))
34259 target = gen_reg_rtx (tmode);
34261 op0 = safe_vector_operand (op0, mode0);
34262 op1 = safe_vector_operand (op1, mode1);
34264 if ((optimize && !register_operand (op0, mode0))
34265 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34266 op0 = copy_to_mode_reg (mode0, op0);
34267 if ((optimize && !register_operand (op1, mode1))
34268 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
34269 op1 = copy_to_mode_reg (mode1, op1);
34271 op2 = GEN_INT (d->comparison);
34273 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
34274 if (! pat)
34275 return 0;
34276 emit_insn (pat);
34277 return target;
34280 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
34282 static rtx
34283 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
34284 rtx target)
34286 rtx pat;
34287 tree arg0 = CALL_EXPR_ARG (exp, 0);
34288 tree arg1 = CALL_EXPR_ARG (exp, 1);
34289 rtx op0 = expand_normal (arg0);
34290 rtx op1 = expand_normal (arg1);
34291 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
34292 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
34293 enum rtx_code comparison = d->comparison;
34295 if (VECTOR_MODE_P (mode0))
34296 op0 = safe_vector_operand (op0, mode0);
34297 if (VECTOR_MODE_P (mode1))
34298 op1 = safe_vector_operand (op1, mode1);
34300 target = gen_reg_rtx (SImode);
34301 emit_move_insn (target, const0_rtx);
34302 target = gen_rtx_SUBREG (QImode, target, 0);
34304 if ((optimize && !register_operand (op0, mode0))
34305 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34306 op0 = copy_to_mode_reg (mode0, op0);
34307 if ((optimize && !register_operand (op1, mode1))
34308 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
34309 op1 = copy_to_mode_reg (mode1, op1);
34311 pat = GEN_FCN (d->icode) (op0, op1);
34312 if (! pat)
34313 return 0;
34314 emit_insn (pat);
34315 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34316 gen_rtx_fmt_ee (comparison, QImode,
34317 SET_DEST (pat),
34318 const0_rtx)));
34320 return SUBREG_REG (target);
34323 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
34325 static rtx
34326 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
34327 tree exp, rtx target)
34329 rtx pat;
34330 tree arg0 = CALL_EXPR_ARG (exp, 0);
34331 tree arg1 = CALL_EXPR_ARG (exp, 1);
34332 tree arg2 = CALL_EXPR_ARG (exp, 2);
34333 tree arg3 = CALL_EXPR_ARG (exp, 3);
34334 tree arg4 = CALL_EXPR_ARG (exp, 4);
34335 rtx scratch0, scratch1;
34336 rtx op0 = expand_normal (arg0);
34337 rtx op1 = expand_normal (arg1);
34338 rtx op2 = expand_normal (arg2);
34339 rtx op3 = expand_normal (arg3);
34340 rtx op4 = expand_normal (arg4);
34341 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
34343 tmode0 = insn_data[d->icode].operand[0].mode;
34344 tmode1 = insn_data[d->icode].operand[1].mode;
34345 modev2 = insn_data[d->icode].operand[2].mode;
34346 modei3 = insn_data[d->icode].operand[3].mode;
34347 modev4 = insn_data[d->icode].operand[4].mode;
34348 modei5 = insn_data[d->icode].operand[5].mode;
34349 modeimm = insn_data[d->icode].operand[6].mode;
34351 if (VECTOR_MODE_P (modev2))
34352 op0 = safe_vector_operand (op0, modev2);
34353 if (VECTOR_MODE_P (modev4))
34354 op2 = safe_vector_operand (op2, modev4);
34356 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
34357 op0 = copy_to_mode_reg (modev2, op0);
34358 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
34359 op1 = copy_to_mode_reg (modei3, op1);
34360 if ((optimize && !register_operand (op2, modev4))
34361 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
34362 op2 = copy_to_mode_reg (modev4, op2);
34363 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
34364 op3 = copy_to_mode_reg (modei5, op3);
34366 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
34368 error ("the fifth argument must be an 8-bit immediate");
34369 return const0_rtx;
34372 if (d->code == IX86_BUILTIN_PCMPESTRI128)
34374 if (optimize || !target
34375 || GET_MODE (target) != tmode0
34376 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
34377 target = gen_reg_rtx (tmode0);
34379 scratch1 = gen_reg_rtx (tmode1);
34381 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
34383 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
34385 if (optimize || !target
34386 || GET_MODE (target) != tmode1
34387 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
34388 target = gen_reg_rtx (tmode1);
34390 scratch0 = gen_reg_rtx (tmode0);
34392 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
34394 else
34396 gcc_assert (d->flag);
34398 scratch0 = gen_reg_rtx (tmode0);
34399 scratch1 = gen_reg_rtx (tmode1);
34401 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
34404 if (! pat)
34405 return 0;
34407 emit_insn (pat);
34409 if (d->flag)
34411 target = gen_reg_rtx (SImode);
34412 emit_move_insn (target, const0_rtx);
34413 target = gen_rtx_SUBREG (QImode, target, 0);
34415 emit_insn
34416 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34417 gen_rtx_fmt_ee (EQ, QImode,
34418 gen_rtx_REG ((machine_mode) d->flag,
34419 FLAGS_REG),
34420 const0_rtx)));
34421 return SUBREG_REG (target);
34423 else
34424 return target;
34428 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
34430 static rtx
34431 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
34432 tree exp, rtx target)
34434 rtx pat;
34435 tree arg0 = CALL_EXPR_ARG (exp, 0);
34436 tree arg1 = CALL_EXPR_ARG (exp, 1);
34437 tree arg2 = CALL_EXPR_ARG (exp, 2);
34438 rtx scratch0, scratch1;
34439 rtx op0 = expand_normal (arg0);
34440 rtx op1 = expand_normal (arg1);
34441 rtx op2 = expand_normal (arg2);
34442 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
34444 tmode0 = insn_data[d->icode].operand[0].mode;
34445 tmode1 = insn_data[d->icode].operand[1].mode;
34446 modev2 = insn_data[d->icode].operand[2].mode;
34447 modev3 = insn_data[d->icode].operand[3].mode;
34448 modeimm = insn_data[d->icode].operand[4].mode;
34450 if (VECTOR_MODE_P (modev2))
34451 op0 = safe_vector_operand (op0, modev2);
34452 if (VECTOR_MODE_P (modev3))
34453 op1 = safe_vector_operand (op1, modev3);
34455 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
34456 op0 = copy_to_mode_reg (modev2, op0);
34457 if ((optimize && !register_operand (op1, modev3))
34458 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
34459 op1 = copy_to_mode_reg (modev3, op1);
34461 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
34463 error ("the third argument must be an 8-bit immediate");
34464 return const0_rtx;
34467 if (d->code == IX86_BUILTIN_PCMPISTRI128)
34469 if (optimize || !target
34470 || GET_MODE (target) != tmode0
34471 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
34472 target = gen_reg_rtx (tmode0);
34474 scratch1 = gen_reg_rtx (tmode1);
34476 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
34478 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
34480 if (optimize || !target
34481 || GET_MODE (target) != tmode1
34482 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
34483 target = gen_reg_rtx (tmode1);
34485 scratch0 = gen_reg_rtx (tmode0);
34487 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
34489 else
34491 gcc_assert (d->flag);
34493 scratch0 = gen_reg_rtx (tmode0);
34494 scratch1 = gen_reg_rtx (tmode1);
34496 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
34499 if (! pat)
34500 return 0;
34502 emit_insn (pat);
34504 if (d->flag)
34506 target = gen_reg_rtx (SImode);
34507 emit_move_insn (target, const0_rtx);
34508 target = gen_rtx_SUBREG (QImode, target, 0);
34510 emit_insn
34511 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34512 gen_rtx_fmt_ee (EQ, QImode,
34513 gen_rtx_REG ((machine_mode) d->flag,
34514 FLAGS_REG),
34515 const0_rtx)));
34516 return SUBREG_REG (target);
34518 else
34519 return target;
34522 /* Subroutine of ix86_expand_builtin to take care of insns with
34523 variable number of operands. */
34525 static rtx
34526 ix86_expand_args_builtin (const struct builtin_description *d,
34527 tree exp, rtx target)
34529 rtx pat, real_target;
34530 unsigned int i, nargs;
34531 unsigned int nargs_constant = 0;
34532 unsigned int mask_pos = 0;
34533 int num_memory = 0;
34534 struct
34536 rtx op;
34537 machine_mode mode;
34538 } args[6];
34539 bool second_arg_count = false;
34540 enum insn_code icode = d->icode;
34541 const struct insn_data_d *insn_p = &insn_data[icode];
34542 machine_mode tmode = insn_p->operand[0].mode;
34543 machine_mode rmode = VOIDmode;
34544 bool swap = false;
34545 enum rtx_code comparison = d->comparison;
34547 switch ((enum ix86_builtin_func_type) d->flag)
34549 case V2DF_FTYPE_V2DF_ROUND:
34550 case V4DF_FTYPE_V4DF_ROUND:
34551 case V8DF_FTYPE_V8DF_ROUND:
34552 case V4SF_FTYPE_V4SF_ROUND:
34553 case V8SF_FTYPE_V8SF_ROUND:
34554 case V16SF_FTYPE_V16SF_ROUND:
34555 case V4SI_FTYPE_V4SF_ROUND:
34556 case V8SI_FTYPE_V8SF_ROUND:
34557 case V16SI_FTYPE_V16SF_ROUND:
34558 return ix86_expand_sse_round (d, exp, target);
34559 case V4SI_FTYPE_V2DF_V2DF_ROUND:
34560 case V8SI_FTYPE_V4DF_V4DF_ROUND:
34561 case V16SI_FTYPE_V8DF_V8DF_ROUND:
34562 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
34563 case INT_FTYPE_V8SF_V8SF_PTEST:
34564 case INT_FTYPE_V4DI_V4DI_PTEST:
34565 case INT_FTYPE_V4DF_V4DF_PTEST:
34566 case INT_FTYPE_V4SF_V4SF_PTEST:
34567 case INT_FTYPE_V2DI_V2DI_PTEST:
34568 case INT_FTYPE_V2DF_V2DF_PTEST:
34569 return ix86_expand_sse_ptest (d, exp, target);
34570 case FLOAT128_FTYPE_FLOAT128:
34571 case FLOAT_FTYPE_FLOAT:
34572 case INT_FTYPE_INT:
34573 case UINT_FTYPE_UINT:
34574 case UINT16_FTYPE_UINT16:
34575 case UINT64_FTYPE_INT:
34576 case UINT64_FTYPE_UINT64:
34577 case INT64_FTYPE_INT64:
34578 case INT64_FTYPE_V4SF:
34579 case INT64_FTYPE_V2DF:
34580 case INT_FTYPE_V16QI:
34581 case INT_FTYPE_V8QI:
34582 case INT_FTYPE_V8SF:
34583 case INT_FTYPE_V4DF:
34584 case INT_FTYPE_V4SF:
34585 case INT_FTYPE_V2DF:
34586 case INT_FTYPE_V32QI:
34587 case V16QI_FTYPE_V16QI:
34588 case V8SI_FTYPE_V8SF:
34589 case V8SI_FTYPE_V4SI:
34590 case V8HI_FTYPE_V8HI:
34591 case V8HI_FTYPE_V16QI:
34592 case V8QI_FTYPE_V8QI:
34593 case V8SF_FTYPE_V8SF:
34594 case V8SF_FTYPE_V8SI:
34595 case V8SF_FTYPE_V4SF:
34596 case V8SF_FTYPE_V8HI:
34597 case V4SI_FTYPE_V4SI:
34598 case V4SI_FTYPE_V16QI:
34599 case V4SI_FTYPE_V4SF:
34600 case V4SI_FTYPE_V8SI:
34601 case V4SI_FTYPE_V8HI:
34602 case V4SI_FTYPE_V4DF:
34603 case V4SI_FTYPE_V2DF:
34604 case V4HI_FTYPE_V4HI:
34605 case V4DF_FTYPE_V4DF:
34606 case V4DF_FTYPE_V4SI:
34607 case V4DF_FTYPE_V4SF:
34608 case V4DF_FTYPE_V2DF:
34609 case V4SF_FTYPE_V4SF:
34610 case V4SF_FTYPE_V4SI:
34611 case V4SF_FTYPE_V8SF:
34612 case V4SF_FTYPE_V4DF:
34613 case V4SF_FTYPE_V8HI:
34614 case V4SF_FTYPE_V2DF:
34615 case V2DI_FTYPE_V2DI:
34616 case V2DI_FTYPE_V16QI:
34617 case V2DI_FTYPE_V8HI:
34618 case V2DI_FTYPE_V4SI:
34619 case V2DF_FTYPE_V2DF:
34620 case V2DF_FTYPE_V4SI:
34621 case V2DF_FTYPE_V4DF:
34622 case V2DF_FTYPE_V4SF:
34623 case V2DF_FTYPE_V2SI:
34624 case V2SI_FTYPE_V2SI:
34625 case V2SI_FTYPE_V4SF:
34626 case V2SI_FTYPE_V2SF:
34627 case V2SI_FTYPE_V2DF:
34628 case V2SF_FTYPE_V2SF:
34629 case V2SF_FTYPE_V2SI:
34630 case V32QI_FTYPE_V32QI:
34631 case V32QI_FTYPE_V16QI:
34632 case V16HI_FTYPE_V16HI:
34633 case V16HI_FTYPE_V8HI:
34634 case V8SI_FTYPE_V8SI:
34635 case V16HI_FTYPE_V16QI:
34636 case V8SI_FTYPE_V16QI:
34637 case V4DI_FTYPE_V16QI:
34638 case V8SI_FTYPE_V8HI:
34639 case V4DI_FTYPE_V8HI:
34640 case V4DI_FTYPE_V4SI:
34641 case V4DI_FTYPE_V2DI:
34642 case UQI_FTYPE_UQI:
34643 case UHI_FTYPE_UHI:
34644 case USI_FTYPE_USI:
34645 case USI_FTYPE_UQI:
34646 case USI_FTYPE_UHI:
34647 case UDI_FTYPE_UDI:
34648 case UHI_FTYPE_V16QI:
34649 case USI_FTYPE_V32QI:
34650 case UDI_FTYPE_V64QI:
34651 case V16QI_FTYPE_UHI:
34652 case V32QI_FTYPE_USI:
34653 case V64QI_FTYPE_UDI:
34654 case V8HI_FTYPE_UQI:
34655 case V16HI_FTYPE_UHI:
34656 case V32HI_FTYPE_USI:
34657 case V4SI_FTYPE_UQI:
34658 case V8SI_FTYPE_UQI:
34659 case V4SI_FTYPE_UHI:
34660 case V8SI_FTYPE_UHI:
34661 case UQI_FTYPE_V8HI:
34662 case UHI_FTYPE_V16HI:
34663 case USI_FTYPE_V32HI:
34664 case UQI_FTYPE_V4SI:
34665 case UQI_FTYPE_V8SI:
34666 case UHI_FTYPE_V16SI:
34667 case UQI_FTYPE_V2DI:
34668 case UQI_FTYPE_V4DI:
34669 case UQI_FTYPE_V8DI:
34670 case V16SI_FTYPE_UHI:
34671 case V2DI_FTYPE_UQI:
34672 case V4DI_FTYPE_UQI:
34673 case V16SI_FTYPE_INT:
34674 case V16SF_FTYPE_V8SF:
34675 case V16SI_FTYPE_V8SI:
34676 case V16SF_FTYPE_V4SF:
34677 case V16SI_FTYPE_V4SI:
34678 case V16SI_FTYPE_V16SF:
34679 case V16SI_FTYPE_V16SI:
34680 case V64QI_FTYPE_V64QI:
34681 case V32HI_FTYPE_V32HI:
34682 case V16SF_FTYPE_V16SF:
34683 case V8DI_FTYPE_UQI:
34684 case V8DI_FTYPE_V8DI:
34685 case V8DF_FTYPE_V4DF:
34686 case V8DF_FTYPE_V2DF:
34687 case V8DF_FTYPE_V8DF:
34688 case V4DI_FTYPE_V4DI:
34689 nargs = 1;
34690 break;
34691 case V4SF_FTYPE_V4SF_VEC_MERGE:
34692 case V2DF_FTYPE_V2DF_VEC_MERGE:
34693 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
34694 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
34695 case V16QI_FTYPE_V16QI_V16QI:
34696 case V16QI_FTYPE_V8HI_V8HI:
34697 case V16SF_FTYPE_V16SF_V16SF:
34698 case V8QI_FTYPE_V8QI_V8QI:
34699 case V8QI_FTYPE_V4HI_V4HI:
34700 case V8HI_FTYPE_V8HI_V8HI:
34701 case V8HI_FTYPE_V16QI_V16QI:
34702 case V8HI_FTYPE_V4SI_V4SI:
34703 case V8SF_FTYPE_V8SF_V8SF:
34704 case V8SF_FTYPE_V8SF_V8SI:
34705 case V8DF_FTYPE_V8DF_V8DF:
34706 case V4SI_FTYPE_V4SI_V4SI:
34707 case V4SI_FTYPE_V8HI_V8HI:
34708 case V4SI_FTYPE_V2DF_V2DF:
34709 case V4HI_FTYPE_V4HI_V4HI:
34710 case V4HI_FTYPE_V8QI_V8QI:
34711 case V4HI_FTYPE_V2SI_V2SI:
34712 case V4DF_FTYPE_V4DF_V4DF:
34713 case V4DF_FTYPE_V4DF_V4DI:
34714 case V4SF_FTYPE_V4SF_V4SF:
34715 case V4SF_FTYPE_V4SF_V4SI:
34716 case V4SF_FTYPE_V4SF_V2SI:
34717 case V4SF_FTYPE_V4SF_V2DF:
34718 case V4SF_FTYPE_V4SF_UINT:
34719 case V4SF_FTYPE_V4SF_DI:
34720 case V4SF_FTYPE_V4SF_SI:
34721 case V2DI_FTYPE_V2DI_V2DI:
34722 case V2DI_FTYPE_V16QI_V16QI:
34723 case V2DI_FTYPE_V4SI_V4SI:
34724 case V2DI_FTYPE_V2DI_V16QI:
34725 case V2SI_FTYPE_V2SI_V2SI:
34726 case V2SI_FTYPE_V4HI_V4HI:
34727 case V2SI_FTYPE_V2SF_V2SF:
34728 case V2DF_FTYPE_V2DF_V2DF:
34729 case V2DF_FTYPE_V2DF_V4SF:
34730 case V2DF_FTYPE_V2DF_V2DI:
34731 case V2DF_FTYPE_V2DF_DI:
34732 case V2DF_FTYPE_V2DF_SI:
34733 case V2DF_FTYPE_V2DF_UINT:
34734 case V2SF_FTYPE_V2SF_V2SF:
34735 case V1DI_FTYPE_V1DI_V1DI:
34736 case V1DI_FTYPE_V8QI_V8QI:
34737 case V1DI_FTYPE_V2SI_V2SI:
34738 case V32QI_FTYPE_V16HI_V16HI:
34739 case V16HI_FTYPE_V8SI_V8SI:
34740 case V64QI_FTYPE_V64QI_V64QI:
34741 case V32QI_FTYPE_V32QI_V32QI:
34742 case V16HI_FTYPE_V32QI_V32QI:
34743 case V16HI_FTYPE_V16HI_V16HI:
34744 case V8SI_FTYPE_V4DF_V4DF:
34745 case V8SI_FTYPE_V8SI_V8SI:
34746 case V8SI_FTYPE_V16HI_V16HI:
34747 case V4DI_FTYPE_V4DI_V4DI:
34748 case V4DI_FTYPE_V8SI_V8SI:
34749 case V8DI_FTYPE_V64QI_V64QI:
34750 if (comparison == UNKNOWN)
34751 return ix86_expand_binop_builtin (icode, exp, target);
34752 nargs = 2;
34753 break;
34754 case V4SF_FTYPE_V4SF_V4SF_SWAP:
34755 case V2DF_FTYPE_V2DF_V2DF_SWAP:
34756 gcc_assert (comparison != UNKNOWN);
34757 nargs = 2;
34758 swap = true;
34759 break;
34760 case V16HI_FTYPE_V16HI_V8HI_COUNT:
34761 case V16HI_FTYPE_V16HI_SI_COUNT:
34762 case V8SI_FTYPE_V8SI_V4SI_COUNT:
34763 case V8SI_FTYPE_V8SI_SI_COUNT:
34764 case V4DI_FTYPE_V4DI_V2DI_COUNT:
34765 case V4DI_FTYPE_V4DI_INT_COUNT:
34766 case V8HI_FTYPE_V8HI_V8HI_COUNT:
34767 case V8HI_FTYPE_V8HI_SI_COUNT:
34768 case V4SI_FTYPE_V4SI_V4SI_COUNT:
34769 case V4SI_FTYPE_V4SI_SI_COUNT:
34770 case V4HI_FTYPE_V4HI_V4HI_COUNT:
34771 case V4HI_FTYPE_V4HI_SI_COUNT:
34772 case V2DI_FTYPE_V2DI_V2DI_COUNT:
34773 case V2DI_FTYPE_V2DI_SI_COUNT:
34774 case V2SI_FTYPE_V2SI_V2SI_COUNT:
34775 case V2SI_FTYPE_V2SI_SI_COUNT:
34776 case V1DI_FTYPE_V1DI_V1DI_COUNT:
34777 case V1DI_FTYPE_V1DI_SI_COUNT:
34778 nargs = 2;
34779 second_arg_count = true;
34780 break;
34781 case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT:
34782 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT:
34783 case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT:
34784 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT:
34785 case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT:
34786 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT:
34787 case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT:
34788 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT:
34789 case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT:
34790 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT:
34791 case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT:
34792 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT:
34793 case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT:
34794 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT:
34795 case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT:
34796 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT:
34797 case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT:
34798 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT:
34799 nargs = 4;
34800 second_arg_count = true;
34801 break;
34802 case UINT64_FTYPE_UINT64_UINT64:
34803 case UINT_FTYPE_UINT_UINT:
34804 case UINT_FTYPE_UINT_USHORT:
34805 case UINT_FTYPE_UINT_UCHAR:
34806 case UINT16_FTYPE_UINT16_INT:
34807 case UINT8_FTYPE_UINT8_INT:
34808 case UQI_FTYPE_UQI_UQI:
34809 case UHI_FTYPE_UHI_UHI:
34810 case USI_FTYPE_USI_USI:
34811 case UDI_FTYPE_UDI_UDI:
34812 case V16SI_FTYPE_V8DF_V8DF:
34813 nargs = 2;
34814 break;
34815 case V2DI_FTYPE_V2DI_INT_CONVERT:
34816 nargs = 2;
34817 rmode = V1TImode;
34818 nargs_constant = 1;
34819 break;
34820 case V4DI_FTYPE_V4DI_INT_CONVERT:
34821 nargs = 2;
34822 rmode = V2TImode;
34823 nargs_constant = 1;
34824 break;
34825 case V8DI_FTYPE_V8DI_INT_CONVERT:
34826 nargs = 2;
34827 rmode = V4TImode;
34828 nargs_constant = 1;
34829 break;
34830 case V8HI_FTYPE_V8HI_INT:
34831 case V8HI_FTYPE_V8SF_INT:
34832 case V16HI_FTYPE_V16SF_INT:
34833 case V8HI_FTYPE_V4SF_INT:
34834 case V8SF_FTYPE_V8SF_INT:
34835 case V4SF_FTYPE_V16SF_INT:
34836 case V16SF_FTYPE_V16SF_INT:
34837 case V4SI_FTYPE_V4SI_INT:
34838 case V4SI_FTYPE_V8SI_INT:
34839 case V4HI_FTYPE_V4HI_INT:
34840 case V4DF_FTYPE_V4DF_INT:
34841 case V4DF_FTYPE_V8DF_INT:
34842 case V4SF_FTYPE_V4SF_INT:
34843 case V4SF_FTYPE_V8SF_INT:
34844 case V2DI_FTYPE_V2DI_INT:
34845 case V2DF_FTYPE_V2DF_INT:
34846 case V2DF_FTYPE_V4DF_INT:
34847 case V16HI_FTYPE_V16HI_INT:
34848 case V8SI_FTYPE_V8SI_INT:
34849 case V16SI_FTYPE_V16SI_INT:
34850 case V4SI_FTYPE_V16SI_INT:
34851 case V4DI_FTYPE_V4DI_INT:
34852 case V2DI_FTYPE_V4DI_INT:
34853 case V4DI_FTYPE_V8DI_INT:
34854 case QI_FTYPE_V4SF_INT:
34855 case QI_FTYPE_V2DF_INT:
34856 case UQI_FTYPE_UQI_UQI_CONST:
34857 case UHI_FTYPE_UHI_UQI:
34858 case USI_FTYPE_USI_UQI:
34859 case UDI_FTYPE_UDI_UQI:
34860 nargs = 2;
34861 nargs_constant = 1;
34862 break;
34863 case V16QI_FTYPE_V16QI_V16QI_V16QI:
34864 case V8SF_FTYPE_V8SF_V8SF_V8SF:
34865 case V4DF_FTYPE_V4DF_V4DF_V4DF:
34866 case V4SF_FTYPE_V4SF_V4SF_V4SF:
34867 case V2DF_FTYPE_V2DF_V2DF_V2DF:
34868 case V32QI_FTYPE_V32QI_V32QI_V32QI:
34869 case UHI_FTYPE_V16SI_V16SI_UHI:
34870 case UQI_FTYPE_V8DI_V8DI_UQI:
34871 case V16HI_FTYPE_V16SI_V16HI_UHI:
34872 case V16QI_FTYPE_V16SI_V16QI_UHI:
34873 case V16QI_FTYPE_V8DI_V16QI_UQI:
34874 case V16SF_FTYPE_V16SF_V16SF_UHI:
34875 case V16SF_FTYPE_V4SF_V16SF_UHI:
34876 case V16SI_FTYPE_SI_V16SI_UHI:
34877 case V16SI_FTYPE_V16HI_V16SI_UHI:
34878 case V16SI_FTYPE_V16QI_V16SI_UHI:
34879 case V8SF_FTYPE_V4SF_V8SF_UQI:
34880 case V4DF_FTYPE_V2DF_V4DF_UQI:
34881 case V8SI_FTYPE_V4SI_V8SI_UQI:
34882 case V8SI_FTYPE_SI_V8SI_UQI:
34883 case V4SI_FTYPE_V4SI_V4SI_UQI:
34884 case V4SI_FTYPE_SI_V4SI_UQI:
34885 case V4DI_FTYPE_V2DI_V4DI_UQI:
34886 case V4DI_FTYPE_DI_V4DI_UQI:
34887 case V2DI_FTYPE_V2DI_V2DI_UQI:
34888 case V2DI_FTYPE_DI_V2DI_UQI:
34889 case V64QI_FTYPE_V64QI_V64QI_UDI:
34890 case V64QI_FTYPE_V16QI_V64QI_UDI:
34891 case V64QI_FTYPE_QI_V64QI_UDI:
34892 case V32QI_FTYPE_V32QI_V32QI_USI:
34893 case V32QI_FTYPE_V16QI_V32QI_USI:
34894 case V32QI_FTYPE_QI_V32QI_USI:
34895 case V16QI_FTYPE_V16QI_V16QI_UHI:
34896 case V16QI_FTYPE_QI_V16QI_UHI:
34897 case V32HI_FTYPE_V8HI_V32HI_USI:
34898 case V32HI_FTYPE_HI_V32HI_USI:
34899 case V16HI_FTYPE_V8HI_V16HI_UHI:
34900 case V16HI_FTYPE_HI_V16HI_UHI:
34901 case V8HI_FTYPE_V8HI_V8HI_UQI:
34902 case V8HI_FTYPE_HI_V8HI_UQI:
34903 case V8SF_FTYPE_V8HI_V8SF_UQI:
34904 case V4SF_FTYPE_V8HI_V4SF_UQI:
34905 case V8SI_FTYPE_V8SF_V8SI_UQI:
34906 case V4SI_FTYPE_V4SF_V4SI_UQI:
34907 case V4DI_FTYPE_V4SF_V4DI_UQI:
34908 case V2DI_FTYPE_V4SF_V2DI_UQI:
34909 case V4SF_FTYPE_V4DI_V4SF_UQI:
34910 case V4SF_FTYPE_V2DI_V4SF_UQI:
34911 case V4DF_FTYPE_V4DI_V4DF_UQI:
34912 case V2DF_FTYPE_V2DI_V2DF_UQI:
34913 case V16QI_FTYPE_V8HI_V16QI_UQI:
34914 case V16QI_FTYPE_V16HI_V16QI_UHI:
34915 case V16QI_FTYPE_V4SI_V16QI_UQI:
34916 case V16QI_FTYPE_V8SI_V16QI_UQI:
34917 case V8HI_FTYPE_V4SI_V8HI_UQI:
34918 case V8HI_FTYPE_V8SI_V8HI_UQI:
34919 case V16QI_FTYPE_V2DI_V16QI_UQI:
34920 case V16QI_FTYPE_V4DI_V16QI_UQI:
34921 case V8HI_FTYPE_V2DI_V8HI_UQI:
34922 case V8HI_FTYPE_V4DI_V8HI_UQI:
34923 case V4SI_FTYPE_V2DI_V4SI_UQI:
34924 case V4SI_FTYPE_V4DI_V4SI_UQI:
34925 case V32QI_FTYPE_V32HI_V32QI_USI:
34926 case UHI_FTYPE_V16QI_V16QI_UHI:
34927 case USI_FTYPE_V32QI_V32QI_USI:
34928 case UDI_FTYPE_V64QI_V64QI_UDI:
34929 case UQI_FTYPE_V8HI_V8HI_UQI:
34930 case UHI_FTYPE_V16HI_V16HI_UHI:
34931 case USI_FTYPE_V32HI_V32HI_USI:
34932 case UQI_FTYPE_V4SI_V4SI_UQI:
34933 case UQI_FTYPE_V8SI_V8SI_UQI:
34934 case UQI_FTYPE_V2DI_V2DI_UQI:
34935 case UQI_FTYPE_V4DI_V4DI_UQI:
34936 case V4SF_FTYPE_V2DF_V4SF_UQI:
34937 case V4SF_FTYPE_V4DF_V4SF_UQI:
34938 case V16SI_FTYPE_V16SI_V16SI_UHI:
34939 case V16SI_FTYPE_V4SI_V16SI_UHI:
34940 case V2DI_FTYPE_V4SI_V2DI_UQI:
34941 case V2DI_FTYPE_V8HI_V2DI_UQI:
34942 case V2DI_FTYPE_V16QI_V2DI_UQI:
34943 case V4DI_FTYPE_V4DI_V4DI_UQI:
34944 case V4DI_FTYPE_V4SI_V4DI_UQI:
34945 case V4DI_FTYPE_V8HI_V4DI_UQI:
34946 case V4DI_FTYPE_V16QI_V4DI_UQI:
34947 case V4DI_FTYPE_V4DF_V4DI_UQI:
34948 case V2DI_FTYPE_V2DF_V2DI_UQI:
34949 case V4SI_FTYPE_V4DF_V4SI_UQI:
34950 case V4SI_FTYPE_V2DF_V4SI_UQI:
34951 case V4SI_FTYPE_V8HI_V4SI_UQI:
34952 case V4SI_FTYPE_V16QI_V4SI_UQI:
34953 case V4DI_FTYPE_V4DI_V4DI_V4DI:
34954 case V8DF_FTYPE_V2DF_V8DF_UQI:
34955 case V8DF_FTYPE_V4DF_V8DF_UQI:
34956 case V8DF_FTYPE_V8DF_V8DF_UQI:
34957 case V8SF_FTYPE_V8SF_V8SF_UQI:
34958 case V8SF_FTYPE_V8SI_V8SF_UQI:
34959 case V4DF_FTYPE_V4DF_V4DF_UQI:
34960 case V4SF_FTYPE_V4SF_V4SF_UQI:
34961 case V2DF_FTYPE_V2DF_V2DF_UQI:
34962 case V2DF_FTYPE_V4SF_V2DF_UQI:
34963 case V2DF_FTYPE_V4SI_V2DF_UQI:
34964 case V4SF_FTYPE_V4SI_V4SF_UQI:
34965 case V4DF_FTYPE_V4SF_V4DF_UQI:
34966 case V4DF_FTYPE_V4SI_V4DF_UQI:
34967 case V8SI_FTYPE_V8SI_V8SI_UQI:
34968 case V8SI_FTYPE_V8HI_V8SI_UQI:
34969 case V8SI_FTYPE_V16QI_V8SI_UQI:
34970 case V8DF_FTYPE_V8SI_V8DF_UQI:
34971 case V8DI_FTYPE_DI_V8DI_UQI:
34972 case V16SF_FTYPE_V8SF_V16SF_UHI:
34973 case V16SI_FTYPE_V8SI_V16SI_UHI:
34974 case V16HI_FTYPE_V16HI_V16HI_UHI:
34975 case V8HI_FTYPE_V16QI_V8HI_UQI:
34976 case V16HI_FTYPE_V16QI_V16HI_UHI:
34977 case V32HI_FTYPE_V32HI_V32HI_USI:
34978 case V32HI_FTYPE_V32QI_V32HI_USI:
34979 case V8DI_FTYPE_V16QI_V8DI_UQI:
34980 case V8DI_FTYPE_V2DI_V8DI_UQI:
34981 case V8DI_FTYPE_V4DI_V8DI_UQI:
34982 case V8DI_FTYPE_V8DI_V8DI_UQI:
34983 case V8DI_FTYPE_V8HI_V8DI_UQI:
34984 case V8DI_FTYPE_V8SI_V8DI_UQI:
34985 case V8HI_FTYPE_V8DI_V8HI_UQI:
34986 case V8SI_FTYPE_V8DI_V8SI_UQI:
34987 case V4SI_FTYPE_V4SI_V4SI_V4SI:
34988 case V16SI_FTYPE_V16SI_V16SI_V16SI:
34989 case V8DI_FTYPE_V8DI_V8DI_V8DI:
34990 case V32HI_FTYPE_V32HI_V32HI_V32HI:
34991 case V2DI_FTYPE_V2DI_V2DI_V2DI:
34992 case V16HI_FTYPE_V16HI_V16HI_V16HI:
34993 case V8SI_FTYPE_V8SI_V8SI_V8SI:
34994 case V8HI_FTYPE_V8HI_V8HI_V8HI:
34995 nargs = 3;
34996 break;
34997 case V32QI_FTYPE_V32QI_V32QI_INT:
34998 case V16HI_FTYPE_V16HI_V16HI_INT:
34999 case V16QI_FTYPE_V16QI_V16QI_INT:
35000 case V4DI_FTYPE_V4DI_V4DI_INT:
35001 case V8HI_FTYPE_V8HI_V8HI_INT:
35002 case V8SI_FTYPE_V8SI_V8SI_INT:
35003 case V8SI_FTYPE_V8SI_V4SI_INT:
35004 case V8SF_FTYPE_V8SF_V8SF_INT:
35005 case V8SF_FTYPE_V8SF_V4SF_INT:
35006 case V4SI_FTYPE_V4SI_V4SI_INT:
35007 case V4DF_FTYPE_V4DF_V4DF_INT:
35008 case V16SF_FTYPE_V16SF_V16SF_INT:
35009 case V16SF_FTYPE_V16SF_V4SF_INT:
35010 case V16SI_FTYPE_V16SI_V4SI_INT:
35011 case V4DF_FTYPE_V4DF_V2DF_INT:
35012 case V4SF_FTYPE_V4SF_V4SF_INT:
35013 case V2DI_FTYPE_V2DI_V2DI_INT:
35014 case V4DI_FTYPE_V4DI_V2DI_INT:
35015 case V2DF_FTYPE_V2DF_V2DF_INT:
35016 case UQI_FTYPE_V8DI_V8UDI_INT:
35017 case UQI_FTYPE_V8DF_V8DF_INT:
35018 case UQI_FTYPE_V2DF_V2DF_INT:
35019 case UQI_FTYPE_V4SF_V4SF_INT:
35020 case UHI_FTYPE_V16SI_V16SI_INT:
35021 case UHI_FTYPE_V16SF_V16SF_INT:
35022 case V64QI_FTYPE_V64QI_V64QI_INT:
35023 case V32HI_FTYPE_V32HI_V32HI_INT:
35024 case V16SI_FTYPE_V16SI_V16SI_INT:
35025 case V8DI_FTYPE_V8DI_V8DI_INT:
35026 nargs = 3;
35027 nargs_constant = 1;
35028 break;
35029 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
35030 nargs = 3;
35031 rmode = V4DImode;
35032 nargs_constant = 1;
35033 break;
35034 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
35035 nargs = 3;
35036 rmode = V2DImode;
35037 nargs_constant = 1;
35038 break;
35039 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
35040 nargs = 3;
35041 rmode = DImode;
35042 nargs_constant = 1;
35043 break;
35044 case V2DI_FTYPE_V2DI_UINT_UINT:
35045 nargs = 3;
35046 nargs_constant = 2;
35047 break;
35048 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
35049 nargs = 3;
35050 rmode = V8DImode;
35051 nargs_constant = 1;
35052 break;
35053 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
35054 nargs = 5;
35055 rmode = V8DImode;
35056 mask_pos = 2;
35057 nargs_constant = 1;
35058 break;
35059 case QI_FTYPE_V8DF_INT_UQI:
35060 case QI_FTYPE_V4DF_INT_UQI:
35061 case QI_FTYPE_V2DF_INT_UQI:
35062 case HI_FTYPE_V16SF_INT_UHI:
35063 case QI_FTYPE_V8SF_INT_UQI:
35064 case QI_FTYPE_V4SF_INT_UQI:
35065 case V4SI_FTYPE_V4SI_V4SI_UHI:
35066 case V8SI_FTYPE_V8SI_V8SI_UHI:
35067 nargs = 3;
35068 mask_pos = 1;
35069 nargs_constant = 1;
35070 break;
35071 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
35072 nargs = 5;
35073 rmode = V4DImode;
35074 mask_pos = 2;
35075 nargs_constant = 1;
35076 break;
35077 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
35078 nargs = 5;
35079 rmode = V2DImode;
35080 mask_pos = 2;
35081 nargs_constant = 1;
35082 break;
35083 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
35084 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
35085 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
35086 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
35087 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
35088 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
35089 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
35090 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
35091 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
35092 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
35093 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
35094 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
35095 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
35096 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
35097 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
35098 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
35099 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
35100 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
35101 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
35102 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
35103 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
35104 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
35105 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
35106 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
35107 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
35108 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
35109 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
35110 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
35111 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
35112 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
35113 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
35114 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
35115 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
35116 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
35117 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
35118 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
35119 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
35120 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
35121 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
35122 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
35123 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
35124 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
35125 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
35126 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
35127 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
35128 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
35129 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
35130 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
35131 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
35132 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
35133 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
35134 nargs = 4;
35135 break;
35136 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
35137 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
35138 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
35139 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
35140 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
35141 nargs = 4;
35142 nargs_constant = 1;
35143 break;
35144 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
35145 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
35146 case QI_FTYPE_V4DF_V4DF_INT_UQI:
35147 case QI_FTYPE_V8SF_V8SF_INT_UQI:
35148 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
35149 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
35150 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
35151 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
35152 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
35153 case USI_FTYPE_V32QI_V32QI_INT_USI:
35154 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
35155 case USI_FTYPE_V32HI_V32HI_INT_USI:
35156 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
35157 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
35158 case V32HI_FTYPE_V32HI_V32HI_V32HI_INT:
35159 case V16HI_FTYPE_V16HI_V16HI_V16HI_INT:
35160 case V8HI_FTYPE_V8HI_V8HI_V8HI_INT:
35161 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT:
35162 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT:
35163 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT:
35164 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT:
35165 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT:
35166 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT:
35167 nargs = 4;
35168 mask_pos = 1;
35169 nargs_constant = 1;
35170 break;
35171 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
35172 nargs = 4;
35173 nargs_constant = 2;
35174 break;
35175 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
35176 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
35177 nargs = 4;
35178 break;
35179 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
35180 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
35181 mask_pos = 1;
35182 nargs = 4;
35183 nargs_constant = 1;
35184 break;
35185 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
35186 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
35187 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
35188 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
35189 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
35190 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
35191 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
35192 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
35193 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
35194 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
35195 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
35196 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
35197 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
35198 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
35199 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
35200 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
35201 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
35202 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
35203 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
35204 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
35205 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
35206 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
35207 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
35208 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
35209 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
35210 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
35211 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
35212 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
35213 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
35214 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
35215 nargs = 4;
35216 mask_pos = 2;
35217 nargs_constant = 1;
35218 break;
35219 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
35220 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
35221 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
35222 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
35223 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
35224 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
35225 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
35226 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
35227 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
35228 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
35229 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
35230 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
35231 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
35232 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
35233 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
35234 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
35235 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
35236 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
35237 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
35238 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
35239 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
35240 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
35241 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
35242 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
35243 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
35244 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
35245 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
35246 nargs = 5;
35247 mask_pos = 2;
35248 nargs_constant = 1;
35249 break;
35250 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
35251 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
35252 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
35253 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
35254 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
35255 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
35256 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
35257 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
35258 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
35259 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
35260 nargs = 5;
35261 mask_pos = 1;
35262 nargs_constant = 1;
35263 break;
35264 case V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI:
35265 case V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI:
35266 case V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI:
35267 case V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT:
35268 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT:
35269 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT:
35270 case V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT:
35271 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT:
35272 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT:
35273 case V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT:
35274 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT:
35275 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT:
35276 nargs = 5;
35277 mask_pos = 1;
35278 nargs_constant = 2;
35279 break;
35281 default:
35282 gcc_unreachable ();
35285 gcc_assert (nargs <= ARRAY_SIZE (args));
35287 if (comparison != UNKNOWN)
35289 gcc_assert (nargs == 2);
35290 return ix86_expand_sse_compare (d, exp, target, swap);
35293 if (rmode == VOIDmode || rmode == tmode)
35295 if (optimize
35296 || target == 0
35297 || GET_MODE (target) != tmode
35298 || !insn_p->operand[0].predicate (target, tmode))
35299 target = gen_reg_rtx (tmode);
35300 else if (memory_operand (target, tmode))
35301 num_memory++;
35302 real_target = target;
35304 else
35306 real_target = gen_reg_rtx (tmode);
35307 target = lowpart_subreg (rmode, real_target, tmode);
35310 for (i = 0; i < nargs; i++)
35312 tree arg = CALL_EXPR_ARG (exp, i);
35313 rtx op = expand_normal (arg);
35314 machine_mode mode = insn_p->operand[i + 1].mode;
35315 bool match = insn_p->operand[i + 1].predicate (op, mode);
35317 if (second_arg_count && i == 1)
35319 /* SIMD shift insns take either an 8-bit immediate or
35320 register as count. But builtin functions take int as
35321 count. If count doesn't match, we put it in register.
35322 The instructions are using 64-bit count, if op is just
35323 32-bit, zero-extend it, as negative shift counts
35324 are undefined behavior and zero-extension is more
35325 efficient. */
35326 if (!match)
35328 if (SCALAR_INT_MODE_P (GET_MODE (op)))
35329 op = convert_modes (mode, GET_MODE (op), op, 1);
35330 else
35331 op = lowpart_subreg (mode, op, GET_MODE (op));
35332 if (!insn_p->operand[i + 1].predicate (op, mode))
35333 op = copy_to_reg (op);
35336 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
35337 (!mask_pos && (nargs - i) <= nargs_constant))
35339 if (!match)
35340 switch (icode)
35342 case CODE_FOR_avx_vinsertf128v4di:
35343 case CODE_FOR_avx_vextractf128v4di:
35344 error ("the last argument must be an 1-bit immediate");
35345 return const0_rtx;
35347 case CODE_FOR_avx512f_cmpv8di3_mask:
35348 case CODE_FOR_avx512f_cmpv16si3_mask:
35349 case CODE_FOR_avx512f_ucmpv8di3_mask:
35350 case CODE_FOR_avx512f_ucmpv16si3_mask:
35351 case CODE_FOR_avx512vl_cmpv4di3_mask:
35352 case CODE_FOR_avx512vl_cmpv8si3_mask:
35353 case CODE_FOR_avx512vl_ucmpv4di3_mask:
35354 case CODE_FOR_avx512vl_ucmpv8si3_mask:
35355 case CODE_FOR_avx512vl_cmpv2di3_mask:
35356 case CODE_FOR_avx512vl_cmpv4si3_mask:
35357 case CODE_FOR_avx512vl_ucmpv2di3_mask:
35358 case CODE_FOR_avx512vl_ucmpv4si3_mask:
35359 error ("the last argument must be a 3-bit immediate");
35360 return const0_rtx;
35362 case CODE_FOR_sse4_1_roundsd:
35363 case CODE_FOR_sse4_1_roundss:
35365 case CODE_FOR_sse4_1_roundpd:
35366 case CODE_FOR_sse4_1_roundps:
35367 case CODE_FOR_avx_roundpd256:
35368 case CODE_FOR_avx_roundps256:
35370 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
35371 case CODE_FOR_sse4_1_roundps_sfix:
35372 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
35373 case CODE_FOR_avx_roundps_sfix256:
35375 case CODE_FOR_sse4_1_blendps:
35376 case CODE_FOR_avx_blendpd256:
35377 case CODE_FOR_avx_vpermilv4df:
35378 case CODE_FOR_avx_vpermilv4df_mask:
35379 case CODE_FOR_avx512f_getmantv8df_mask:
35380 case CODE_FOR_avx512f_getmantv16sf_mask:
35381 case CODE_FOR_avx512vl_getmantv8sf_mask:
35382 case CODE_FOR_avx512vl_getmantv4df_mask:
35383 case CODE_FOR_avx512vl_getmantv4sf_mask:
35384 case CODE_FOR_avx512vl_getmantv2df_mask:
35385 case CODE_FOR_avx512dq_rangepv8df_mask_round:
35386 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
35387 case CODE_FOR_avx512dq_rangepv4df_mask:
35388 case CODE_FOR_avx512dq_rangepv8sf_mask:
35389 case CODE_FOR_avx512dq_rangepv2df_mask:
35390 case CODE_FOR_avx512dq_rangepv4sf_mask:
35391 case CODE_FOR_avx_shufpd256_mask:
35392 error ("the last argument must be a 4-bit immediate");
35393 return const0_rtx;
35395 case CODE_FOR_sha1rnds4:
35396 case CODE_FOR_sse4_1_blendpd:
35397 case CODE_FOR_avx_vpermilv2df:
35398 case CODE_FOR_avx_vpermilv2df_mask:
35399 case CODE_FOR_xop_vpermil2v2df3:
35400 case CODE_FOR_xop_vpermil2v4sf3:
35401 case CODE_FOR_xop_vpermil2v4df3:
35402 case CODE_FOR_xop_vpermil2v8sf3:
35403 case CODE_FOR_avx512f_vinsertf32x4_mask:
35404 case CODE_FOR_avx512f_vinserti32x4_mask:
35405 case CODE_FOR_avx512f_vextractf32x4_mask:
35406 case CODE_FOR_avx512f_vextracti32x4_mask:
35407 case CODE_FOR_sse2_shufpd:
35408 case CODE_FOR_sse2_shufpd_mask:
35409 case CODE_FOR_avx512dq_shuf_f64x2_mask:
35410 case CODE_FOR_avx512dq_shuf_i64x2_mask:
35411 case CODE_FOR_avx512vl_shuf_i32x4_mask:
35412 case CODE_FOR_avx512vl_shuf_f32x4_mask:
35413 error ("the last argument must be a 2-bit immediate");
35414 return const0_rtx;
35416 case CODE_FOR_avx_vextractf128v4df:
35417 case CODE_FOR_avx_vextractf128v8sf:
35418 case CODE_FOR_avx_vextractf128v8si:
35419 case CODE_FOR_avx_vinsertf128v4df:
35420 case CODE_FOR_avx_vinsertf128v8sf:
35421 case CODE_FOR_avx_vinsertf128v8si:
35422 case CODE_FOR_avx512f_vinsertf64x4_mask:
35423 case CODE_FOR_avx512f_vinserti64x4_mask:
35424 case CODE_FOR_avx512f_vextractf64x4_mask:
35425 case CODE_FOR_avx512f_vextracti64x4_mask:
35426 case CODE_FOR_avx512dq_vinsertf32x8_mask:
35427 case CODE_FOR_avx512dq_vinserti32x8_mask:
35428 case CODE_FOR_avx512vl_vinsertv4df:
35429 case CODE_FOR_avx512vl_vinsertv4di:
35430 case CODE_FOR_avx512vl_vinsertv8sf:
35431 case CODE_FOR_avx512vl_vinsertv8si:
35432 error ("the last argument must be a 1-bit immediate");
35433 return const0_rtx;
35435 case CODE_FOR_avx_vmcmpv2df3:
35436 case CODE_FOR_avx_vmcmpv4sf3:
35437 case CODE_FOR_avx_cmpv2df3:
35438 case CODE_FOR_avx_cmpv4sf3:
35439 case CODE_FOR_avx_cmpv4df3:
35440 case CODE_FOR_avx_cmpv8sf3:
35441 case CODE_FOR_avx512f_cmpv8df3_mask:
35442 case CODE_FOR_avx512f_cmpv16sf3_mask:
35443 case CODE_FOR_avx512f_vmcmpv2df3_mask:
35444 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
35445 error ("the last argument must be a 5-bit immediate");
35446 return const0_rtx;
35448 default:
35449 switch (nargs_constant)
35451 case 2:
35452 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
35453 (!mask_pos && (nargs - i) == nargs_constant))
35455 error ("the next to last argument must be an 8-bit immediate");
35456 break;
35458 /* FALLTHRU */
35459 case 1:
35460 error ("the last argument must be an 8-bit immediate");
35461 break;
35462 default:
35463 gcc_unreachable ();
35465 return const0_rtx;
35468 else
35470 if (VECTOR_MODE_P (mode))
35471 op = safe_vector_operand (op, mode);
35473 /* If we aren't optimizing, only allow one memory operand to
35474 be generated. */
35475 if (memory_operand (op, mode))
35476 num_memory++;
35478 op = fixup_modeless_constant (op, mode);
35480 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
35482 if (optimize || !match || num_memory > 1)
35483 op = copy_to_mode_reg (mode, op);
35485 else
35487 op = copy_to_reg (op);
35488 op = lowpart_subreg (mode, op, GET_MODE (op));
35492 args[i].op = op;
35493 args[i].mode = mode;
35496 switch (nargs)
35498 case 1:
35499 pat = GEN_FCN (icode) (real_target, args[0].op);
35500 break;
35501 case 2:
35502 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
35503 break;
35504 case 3:
35505 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35506 args[2].op);
35507 break;
35508 case 4:
35509 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35510 args[2].op, args[3].op);
35511 break;
35512 case 5:
35513 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35514 args[2].op, args[3].op, args[4].op);
35515 break;
35516 case 6:
35517 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35518 args[2].op, args[3].op, args[4].op,
35519 args[5].op);
35520 break;
35521 default:
35522 gcc_unreachable ();
35525 if (! pat)
35526 return 0;
35528 emit_insn (pat);
35529 return target;
35532 /* Transform pattern of following layout:
35533 (set A
35534 (unspec [B C] UNSPEC_EMBEDDED_ROUNDING))
35536 into:
35537 (set (A B)) */
35539 static rtx
35540 ix86_erase_embedded_rounding (rtx pat)
35542 if (GET_CODE (pat) == INSN)
35543 pat = PATTERN (pat);
35545 gcc_assert (GET_CODE (pat) == SET);
35546 rtx src = SET_SRC (pat);
35547 gcc_assert (XVECLEN (src, 0) == 2);
35548 rtx p0 = XVECEXP (src, 0, 0);
35549 gcc_assert (GET_CODE (src) == UNSPEC
35550 && XINT (src, 1) == UNSPEC_EMBEDDED_ROUNDING);
35551 rtx res = gen_rtx_SET (SET_DEST (pat), p0);
35552 return res;
35555 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
35556 with rounding. */
35557 static rtx
35558 ix86_expand_sse_comi_round (const struct builtin_description *d,
35559 tree exp, rtx target)
35561 rtx pat, set_dst;
35562 tree arg0 = CALL_EXPR_ARG (exp, 0);
35563 tree arg1 = CALL_EXPR_ARG (exp, 1);
35564 tree arg2 = CALL_EXPR_ARG (exp, 2);
35565 tree arg3 = CALL_EXPR_ARG (exp, 3);
35566 rtx op0 = expand_normal (arg0);
35567 rtx op1 = expand_normal (arg1);
35568 rtx op2 = expand_normal (arg2);
35569 rtx op3 = expand_normal (arg3);
35570 enum insn_code icode = d->icode;
35571 const struct insn_data_d *insn_p = &insn_data[icode];
35572 machine_mode mode0 = insn_p->operand[0].mode;
35573 machine_mode mode1 = insn_p->operand[1].mode;
35574 enum rtx_code comparison = UNEQ;
35575 bool need_ucomi = false;
35577 /* See avxintrin.h for values. */
35578 enum rtx_code comi_comparisons[32] =
35580 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
35581 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
35582 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
35584 bool need_ucomi_values[32] =
35586 true, false, false, true, true, false, false, true,
35587 true, false, false, true, true, false, false, true,
35588 false, true, true, false, false, true, true, false,
35589 false, true, true, false, false, true, true, false
35592 if (!CONST_INT_P (op2))
35594 error ("the third argument must be comparison constant");
35595 return const0_rtx;
35597 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
35599 error ("incorrect comparison mode");
35600 return const0_rtx;
35603 if (!insn_p->operand[2].predicate (op3, SImode))
35605 error ("incorrect rounding operand");
35606 return const0_rtx;
35609 comparison = comi_comparisons[INTVAL (op2)];
35610 need_ucomi = need_ucomi_values[INTVAL (op2)];
35612 if (VECTOR_MODE_P (mode0))
35613 op0 = safe_vector_operand (op0, mode0);
35614 if (VECTOR_MODE_P (mode1))
35615 op1 = safe_vector_operand (op1, mode1);
35617 target = gen_reg_rtx (SImode);
35618 emit_move_insn (target, const0_rtx);
35619 target = gen_rtx_SUBREG (QImode, target, 0);
35621 if ((optimize && !register_operand (op0, mode0))
35622 || !insn_p->operand[0].predicate (op0, mode0))
35623 op0 = copy_to_mode_reg (mode0, op0);
35624 if ((optimize && !register_operand (op1, mode1))
35625 || !insn_p->operand[1].predicate (op1, mode1))
35626 op1 = copy_to_mode_reg (mode1, op1);
35628 if (need_ucomi)
35629 icode = icode == CODE_FOR_sse_comi_round
35630 ? CODE_FOR_sse_ucomi_round
35631 : CODE_FOR_sse2_ucomi_round;
35633 pat = GEN_FCN (icode) (op0, op1, op3);
35634 if (! pat)
35635 return 0;
35637 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
35638 if (INTVAL (op3) == NO_ROUND)
35640 pat = ix86_erase_embedded_rounding (pat);
35641 if (! pat)
35642 return 0;
35644 set_dst = SET_DEST (pat);
35646 else
35648 gcc_assert (GET_CODE (pat) == SET);
35649 set_dst = SET_DEST (pat);
35652 emit_insn (pat);
35653 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
35654 gen_rtx_fmt_ee (comparison, QImode,
35655 set_dst,
35656 const0_rtx)));
35658 return SUBREG_REG (target);
35661 static rtx
35662 ix86_expand_round_builtin (const struct builtin_description *d,
35663 tree exp, rtx target)
35665 rtx pat;
35666 unsigned int i, nargs;
35667 struct
35669 rtx op;
35670 machine_mode mode;
35671 } args[6];
35672 enum insn_code icode = d->icode;
35673 const struct insn_data_d *insn_p = &insn_data[icode];
35674 machine_mode tmode = insn_p->operand[0].mode;
35675 unsigned int nargs_constant = 0;
35676 unsigned int redundant_embed_rnd = 0;
35678 switch ((enum ix86_builtin_func_type) d->flag)
35680 case UINT64_FTYPE_V2DF_INT:
35681 case UINT64_FTYPE_V4SF_INT:
35682 case UINT_FTYPE_V2DF_INT:
35683 case UINT_FTYPE_V4SF_INT:
35684 case INT64_FTYPE_V2DF_INT:
35685 case INT64_FTYPE_V4SF_INT:
35686 case INT_FTYPE_V2DF_INT:
35687 case INT_FTYPE_V4SF_INT:
35688 nargs = 2;
35689 break;
35690 case V4SF_FTYPE_V4SF_UINT_INT:
35691 case V4SF_FTYPE_V4SF_UINT64_INT:
35692 case V2DF_FTYPE_V2DF_UINT64_INT:
35693 case V4SF_FTYPE_V4SF_INT_INT:
35694 case V4SF_FTYPE_V4SF_INT64_INT:
35695 case V2DF_FTYPE_V2DF_INT64_INT:
35696 case V4SF_FTYPE_V4SF_V4SF_INT:
35697 case V2DF_FTYPE_V2DF_V2DF_INT:
35698 case V4SF_FTYPE_V4SF_V2DF_INT:
35699 case V2DF_FTYPE_V2DF_V4SF_INT:
35700 nargs = 3;
35701 break;
35702 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
35703 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
35704 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
35705 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
35706 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
35707 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
35708 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
35709 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
35710 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
35711 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
35712 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
35713 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
35714 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
35715 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
35716 nargs = 4;
35717 break;
35718 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
35719 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
35720 nargs_constant = 2;
35721 nargs = 4;
35722 break;
35723 case INT_FTYPE_V4SF_V4SF_INT_INT:
35724 case INT_FTYPE_V2DF_V2DF_INT_INT:
35725 return ix86_expand_sse_comi_round (d, exp, target);
35726 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
35727 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
35728 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
35729 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
35730 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
35731 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
35732 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
35733 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
35734 nargs = 5;
35735 break;
35736 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
35737 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
35738 nargs_constant = 4;
35739 nargs = 5;
35740 break;
35741 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
35742 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
35743 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
35744 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
35745 nargs_constant = 3;
35746 nargs = 5;
35747 break;
35748 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
35749 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
35750 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
35751 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
35752 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT:
35753 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT:
35754 nargs = 6;
35755 nargs_constant = 4;
35756 break;
35757 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
35758 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
35759 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
35760 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
35761 nargs = 6;
35762 nargs_constant = 3;
35763 break;
35764 default:
35765 gcc_unreachable ();
35767 gcc_assert (nargs <= ARRAY_SIZE (args));
35769 if (optimize
35770 || target == 0
35771 || GET_MODE (target) != tmode
35772 || !insn_p->operand[0].predicate (target, tmode))
35773 target = gen_reg_rtx (tmode);
35775 for (i = 0; i < nargs; i++)
35777 tree arg = CALL_EXPR_ARG (exp, i);
35778 rtx op = expand_normal (arg);
35779 machine_mode mode = insn_p->operand[i + 1].mode;
35780 bool match = insn_p->operand[i + 1].predicate (op, mode);
35782 if (i == nargs - nargs_constant)
35784 if (!match)
35786 switch (icode)
35788 case CODE_FOR_avx512f_getmantv8df_mask_round:
35789 case CODE_FOR_avx512f_getmantv16sf_mask_round:
35790 case CODE_FOR_avx512f_vgetmantv2df_round:
35791 case CODE_FOR_avx512f_vgetmantv2df_mask_round:
35792 case CODE_FOR_avx512f_vgetmantv4sf_round:
35793 case CODE_FOR_avx512f_vgetmantv4sf_mask_round:
35794 error ("the immediate argument must be a 4-bit immediate");
35795 return const0_rtx;
35796 case CODE_FOR_avx512f_cmpv8df3_mask_round:
35797 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
35798 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
35799 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
35800 error ("the immediate argument must be a 5-bit immediate");
35801 return const0_rtx;
35802 default:
35803 error ("the immediate argument must be an 8-bit immediate");
35804 return const0_rtx;
35808 else if (i == nargs-1)
35810 if (!insn_p->operand[nargs].predicate (op, SImode))
35812 error ("incorrect rounding operand");
35813 return const0_rtx;
35816 /* If there is no rounding use normal version of the pattern. */
35817 if (INTVAL (op) == NO_ROUND)
35818 redundant_embed_rnd = 1;
35820 else
35822 if (VECTOR_MODE_P (mode))
35823 op = safe_vector_operand (op, mode);
35825 op = fixup_modeless_constant (op, mode);
35827 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
35829 if (optimize || !match)
35830 op = copy_to_mode_reg (mode, op);
35832 else
35834 op = copy_to_reg (op);
35835 op = lowpart_subreg (mode, op, GET_MODE (op));
35839 args[i].op = op;
35840 args[i].mode = mode;
35843 switch (nargs)
35845 case 1:
35846 pat = GEN_FCN (icode) (target, args[0].op);
35847 break;
35848 case 2:
35849 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35850 break;
35851 case 3:
35852 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35853 args[2].op);
35854 break;
35855 case 4:
35856 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35857 args[2].op, args[3].op);
35858 break;
35859 case 5:
35860 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35861 args[2].op, args[3].op, args[4].op);
35862 break;
35863 case 6:
35864 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35865 args[2].op, args[3].op, args[4].op,
35866 args[5].op);
35867 break;
35868 default:
35869 gcc_unreachable ();
35872 if (!pat)
35873 return 0;
35875 if (redundant_embed_rnd)
35876 pat = ix86_erase_embedded_rounding (pat);
35878 emit_insn (pat);
35879 return target;
35882 /* Subroutine of ix86_expand_builtin to take care of special insns
35883 with variable number of operands. */
35885 static rtx
35886 ix86_expand_special_args_builtin (const struct builtin_description *d,
35887 tree exp, rtx target)
35889 tree arg;
35890 rtx pat, op;
35891 unsigned int i, nargs, arg_adjust, memory;
35892 bool aligned_mem = false;
35893 struct
35895 rtx op;
35896 machine_mode mode;
35897 } args[3];
35898 enum insn_code icode = d->icode;
35899 bool last_arg_constant = false;
35900 const struct insn_data_d *insn_p = &insn_data[icode];
35901 machine_mode tmode = insn_p->operand[0].mode;
35902 enum { load, store } klass;
35904 switch ((enum ix86_builtin_func_type) d->flag)
35906 case VOID_FTYPE_VOID:
35907 emit_insn (GEN_FCN (icode) (target));
35908 return 0;
35909 case VOID_FTYPE_UINT64:
35910 case VOID_FTYPE_UNSIGNED:
35911 nargs = 0;
35912 klass = store;
35913 memory = 0;
35914 break;
35916 case INT_FTYPE_VOID:
35917 case USHORT_FTYPE_VOID:
35918 case UINT64_FTYPE_VOID:
35919 case UINT_FTYPE_VOID:
35920 case UNSIGNED_FTYPE_VOID:
35921 nargs = 0;
35922 klass = load;
35923 memory = 0;
35924 break;
35925 case UINT64_FTYPE_PUNSIGNED:
35926 case V2DI_FTYPE_PV2DI:
35927 case V4DI_FTYPE_PV4DI:
35928 case V32QI_FTYPE_PCCHAR:
35929 case V16QI_FTYPE_PCCHAR:
35930 case V8SF_FTYPE_PCV4SF:
35931 case V8SF_FTYPE_PCFLOAT:
35932 case V4SF_FTYPE_PCFLOAT:
35933 case V4DF_FTYPE_PCV2DF:
35934 case V4DF_FTYPE_PCDOUBLE:
35935 case V2DF_FTYPE_PCDOUBLE:
35936 case VOID_FTYPE_PVOID:
35937 case V8DI_FTYPE_PV8DI:
35938 nargs = 1;
35939 klass = load;
35940 memory = 0;
35941 switch (icode)
35943 case CODE_FOR_sse4_1_movntdqa:
35944 case CODE_FOR_avx2_movntdqa:
35945 case CODE_FOR_avx512f_movntdqa:
35946 aligned_mem = true;
35947 break;
35948 default:
35949 break;
35951 break;
35952 case VOID_FTYPE_PV2SF_V4SF:
35953 case VOID_FTYPE_PV8DI_V8DI:
35954 case VOID_FTYPE_PV4DI_V4DI:
35955 case VOID_FTYPE_PV2DI_V2DI:
35956 case VOID_FTYPE_PCHAR_V32QI:
35957 case VOID_FTYPE_PCHAR_V16QI:
35958 case VOID_FTYPE_PFLOAT_V16SF:
35959 case VOID_FTYPE_PFLOAT_V8SF:
35960 case VOID_FTYPE_PFLOAT_V4SF:
35961 case VOID_FTYPE_PDOUBLE_V8DF:
35962 case VOID_FTYPE_PDOUBLE_V4DF:
35963 case VOID_FTYPE_PDOUBLE_V2DF:
35964 case VOID_FTYPE_PLONGLONG_LONGLONG:
35965 case VOID_FTYPE_PULONGLONG_ULONGLONG:
35966 case VOID_FTYPE_PINT_INT:
35967 nargs = 1;
35968 klass = store;
35969 /* Reserve memory operand for target. */
35970 memory = ARRAY_SIZE (args);
35971 switch (icode)
35973 /* These builtins and instructions require the memory
35974 to be properly aligned. */
35975 case CODE_FOR_avx_movntv4di:
35976 case CODE_FOR_sse2_movntv2di:
35977 case CODE_FOR_avx_movntv8sf:
35978 case CODE_FOR_sse_movntv4sf:
35979 case CODE_FOR_sse4a_vmmovntv4sf:
35980 case CODE_FOR_avx_movntv4df:
35981 case CODE_FOR_sse2_movntv2df:
35982 case CODE_FOR_sse4a_vmmovntv2df:
35983 case CODE_FOR_sse2_movntidi:
35984 case CODE_FOR_sse_movntq:
35985 case CODE_FOR_sse2_movntisi:
35986 case CODE_FOR_avx512f_movntv16sf:
35987 case CODE_FOR_avx512f_movntv8df:
35988 case CODE_FOR_avx512f_movntv8di:
35989 aligned_mem = true;
35990 break;
35991 default:
35992 break;
35994 break;
35995 case V4SF_FTYPE_V4SF_PCV2SF:
35996 case V2DF_FTYPE_V2DF_PCDOUBLE:
35997 nargs = 2;
35998 klass = load;
35999 memory = 1;
36000 break;
36001 case V8SF_FTYPE_PCV8SF_V8SI:
36002 case V4DF_FTYPE_PCV4DF_V4DI:
36003 case V4SF_FTYPE_PCV4SF_V4SI:
36004 case V2DF_FTYPE_PCV2DF_V2DI:
36005 case V8SI_FTYPE_PCV8SI_V8SI:
36006 case V4DI_FTYPE_PCV4DI_V4DI:
36007 case V4SI_FTYPE_PCV4SI_V4SI:
36008 case V2DI_FTYPE_PCV2DI_V2DI:
36009 case VOID_FTYPE_INT_INT64:
36010 nargs = 2;
36011 klass = load;
36012 memory = 0;
36013 break;
36014 case VOID_FTYPE_PV8DF_V8DF_UQI:
36015 case VOID_FTYPE_PV4DF_V4DF_UQI:
36016 case VOID_FTYPE_PV2DF_V2DF_UQI:
36017 case VOID_FTYPE_PV16SF_V16SF_UHI:
36018 case VOID_FTYPE_PV8SF_V8SF_UQI:
36019 case VOID_FTYPE_PV4SF_V4SF_UQI:
36020 case VOID_FTYPE_PV8DI_V8DI_UQI:
36021 case VOID_FTYPE_PV4DI_V4DI_UQI:
36022 case VOID_FTYPE_PV2DI_V2DI_UQI:
36023 case VOID_FTYPE_PV16SI_V16SI_UHI:
36024 case VOID_FTYPE_PV8SI_V8SI_UQI:
36025 case VOID_FTYPE_PV4SI_V4SI_UQI:
36026 case VOID_FTYPE_PV64QI_V64QI_UDI:
36027 case VOID_FTYPE_PV32HI_V32HI_USI:
36028 case VOID_FTYPE_PV32QI_V32QI_USI:
36029 case VOID_FTYPE_PV16QI_V16QI_UHI:
36030 case VOID_FTYPE_PV16HI_V16HI_UHI:
36031 case VOID_FTYPE_PV8HI_V8HI_UQI:
36032 switch (icode)
36034 /* These builtins and instructions require the memory
36035 to be properly aligned. */
36036 case CODE_FOR_avx512f_storev16sf_mask:
36037 case CODE_FOR_avx512f_storev16si_mask:
36038 case CODE_FOR_avx512f_storev8df_mask:
36039 case CODE_FOR_avx512f_storev8di_mask:
36040 case CODE_FOR_avx512vl_storev8sf_mask:
36041 case CODE_FOR_avx512vl_storev8si_mask:
36042 case CODE_FOR_avx512vl_storev4df_mask:
36043 case CODE_FOR_avx512vl_storev4di_mask:
36044 case CODE_FOR_avx512vl_storev4sf_mask:
36045 case CODE_FOR_avx512vl_storev4si_mask:
36046 case CODE_FOR_avx512vl_storev2df_mask:
36047 case CODE_FOR_avx512vl_storev2di_mask:
36048 aligned_mem = true;
36049 break;
36050 default:
36051 break;
36053 /* FALLTHRU */
36054 case VOID_FTYPE_PV8SF_V8SI_V8SF:
36055 case VOID_FTYPE_PV4DF_V4DI_V4DF:
36056 case VOID_FTYPE_PV4SF_V4SI_V4SF:
36057 case VOID_FTYPE_PV2DF_V2DI_V2DF:
36058 case VOID_FTYPE_PV8SI_V8SI_V8SI:
36059 case VOID_FTYPE_PV4DI_V4DI_V4DI:
36060 case VOID_FTYPE_PV4SI_V4SI_V4SI:
36061 case VOID_FTYPE_PV2DI_V2DI_V2DI:
36062 case VOID_FTYPE_PV8SI_V8DI_UQI:
36063 case VOID_FTYPE_PV8HI_V8DI_UQI:
36064 case VOID_FTYPE_PV16HI_V16SI_UHI:
36065 case VOID_FTYPE_PV16QI_V8DI_UQI:
36066 case VOID_FTYPE_PV16QI_V16SI_UHI:
36067 case VOID_FTYPE_PV4SI_V4DI_UQI:
36068 case VOID_FTYPE_PV4SI_V2DI_UQI:
36069 case VOID_FTYPE_PV8HI_V4DI_UQI:
36070 case VOID_FTYPE_PV8HI_V2DI_UQI:
36071 case VOID_FTYPE_PV8HI_V8SI_UQI:
36072 case VOID_FTYPE_PV8HI_V4SI_UQI:
36073 case VOID_FTYPE_PV16QI_V4DI_UQI:
36074 case VOID_FTYPE_PV16QI_V2DI_UQI:
36075 case VOID_FTYPE_PV16QI_V8SI_UQI:
36076 case VOID_FTYPE_PV16QI_V4SI_UQI:
36077 case VOID_FTYPE_PCHAR_V64QI_UDI:
36078 case VOID_FTYPE_PCHAR_V32QI_USI:
36079 case VOID_FTYPE_PCHAR_V16QI_UHI:
36080 case VOID_FTYPE_PSHORT_V32HI_USI:
36081 case VOID_FTYPE_PSHORT_V16HI_UHI:
36082 case VOID_FTYPE_PSHORT_V8HI_UQI:
36083 case VOID_FTYPE_PINT_V16SI_UHI:
36084 case VOID_FTYPE_PINT_V8SI_UQI:
36085 case VOID_FTYPE_PINT_V4SI_UQI:
36086 case VOID_FTYPE_PINT64_V8DI_UQI:
36087 case VOID_FTYPE_PINT64_V4DI_UQI:
36088 case VOID_FTYPE_PINT64_V2DI_UQI:
36089 case VOID_FTYPE_PDOUBLE_V8DF_UQI:
36090 case VOID_FTYPE_PDOUBLE_V4DF_UQI:
36091 case VOID_FTYPE_PDOUBLE_V2DF_UQI:
36092 case VOID_FTYPE_PFLOAT_V16SF_UHI:
36093 case VOID_FTYPE_PFLOAT_V8SF_UQI:
36094 case VOID_FTYPE_PFLOAT_V4SF_UQI:
36095 case VOID_FTYPE_PV32QI_V32HI_USI:
36096 case VOID_FTYPE_PV16QI_V16HI_UHI:
36097 case VOID_FTYPE_PV8QI_V8HI_UQI:
36098 nargs = 2;
36099 klass = store;
36100 /* Reserve memory operand for target. */
36101 memory = ARRAY_SIZE (args);
36102 break;
36103 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
36104 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
36105 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
36106 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
36107 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
36108 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
36109 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
36110 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
36111 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
36112 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
36113 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
36114 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
36115 case V64QI_FTYPE_PCV64QI_V64QI_UDI:
36116 case V32HI_FTYPE_PCV32HI_V32HI_USI:
36117 case V32QI_FTYPE_PCV32QI_V32QI_USI:
36118 case V16QI_FTYPE_PCV16QI_V16QI_UHI:
36119 case V16HI_FTYPE_PCV16HI_V16HI_UHI:
36120 case V8HI_FTYPE_PCV8HI_V8HI_UQI:
36121 switch (icode)
36123 /* These builtins and instructions require the memory
36124 to be properly aligned. */
36125 case CODE_FOR_avx512f_loadv16sf_mask:
36126 case CODE_FOR_avx512f_loadv16si_mask:
36127 case CODE_FOR_avx512f_loadv8df_mask:
36128 case CODE_FOR_avx512f_loadv8di_mask:
36129 case CODE_FOR_avx512vl_loadv8sf_mask:
36130 case CODE_FOR_avx512vl_loadv8si_mask:
36131 case CODE_FOR_avx512vl_loadv4df_mask:
36132 case CODE_FOR_avx512vl_loadv4di_mask:
36133 case CODE_FOR_avx512vl_loadv4sf_mask:
36134 case CODE_FOR_avx512vl_loadv4si_mask:
36135 case CODE_FOR_avx512vl_loadv2df_mask:
36136 case CODE_FOR_avx512vl_loadv2di_mask:
36137 case CODE_FOR_avx512bw_loadv64qi_mask:
36138 case CODE_FOR_avx512vl_loadv32qi_mask:
36139 case CODE_FOR_avx512vl_loadv16qi_mask:
36140 case CODE_FOR_avx512bw_loadv32hi_mask:
36141 case CODE_FOR_avx512vl_loadv16hi_mask:
36142 case CODE_FOR_avx512vl_loadv8hi_mask:
36143 aligned_mem = true;
36144 break;
36145 default:
36146 break;
36148 /* FALLTHRU */
36149 case V64QI_FTYPE_PCCHAR_V64QI_UDI:
36150 case V32QI_FTYPE_PCCHAR_V32QI_USI:
36151 case V16QI_FTYPE_PCCHAR_V16QI_UHI:
36152 case V32HI_FTYPE_PCSHORT_V32HI_USI:
36153 case V16HI_FTYPE_PCSHORT_V16HI_UHI:
36154 case V8HI_FTYPE_PCSHORT_V8HI_UQI:
36155 case V16SI_FTYPE_PCINT_V16SI_UHI:
36156 case V8SI_FTYPE_PCINT_V8SI_UQI:
36157 case V4SI_FTYPE_PCINT_V4SI_UQI:
36158 case V8DI_FTYPE_PCINT64_V8DI_UQI:
36159 case V4DI_FTYPE_PCINT64_V4DI_UQI:
36160 case V2DI_FTYPE_PCINT64_V2DI_UQI:
36161 case V8DF_FTYPE_PCDOUBLE_V8DF_UQI:
36162 case V4DF_FTYPE_PCDOUBLE_V4DF_UQI:
36163 case V2DF_FTYPE_PCDOUBLE_V2DF_UQI:
36164 case V16SF_FTYPE_PCFLOAT_V16SF_UHI:
36165 case V8SF_FTYPE_PCFLOAT_V8SF_UQI:
36166 case V4SF_FTYPE_PCFLOAT_V4SF_UQI:
36167 nargs = 3;
36168 klass = load;
36169 memory = 0;
36170 break;
36171 case VOID_FTYPE_UINT_UINT_UINT:
36172 case VOID_FTYPE_UINT64_UINT_UINT:
36173 case UCHAR_FTYPE_UINT_UINT_UINT:
36174 case UCHAR_FTYPE_UINT64_UINT_UINT:
36175 nargs = 3;
36176 klass = load;
36177 memory = ARRAY_SIZE (args);
36178 last_arg_constant = true;
36179 break;
36180 default:
36181 gcc_unreachable ();
36184 gcc_assert (nargs <= ARRAY_SIZE (args));
36186 if (klass == store)
36188 arg = CALL_EXPR_ARG (exp, 0);
36189 op = expand_normal (arg);
36190 gcc_assert (target == 0);
36191 if (memory)
36193 op = ix86_zero_extend_to_Pmode (op);
36194 target = gen_rtx_MEM (tmode, op);
36195 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
36196 on it. Try to improve it using get_pointer_alignment,
36197 and if the special builtin is one that requires strict
36198 mode alignment, also from it's GET_MODE_ALIGNMENT.
36199 Failure to do so could lead to ix86_legitimate_combined_insn
36200 rejecting all changes to such insns. */
36201 unsigned int align = get_pointer_alignment (arg);
36202 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
36203 align = GET_MODE_ALIGNMENT (tmode);
36204 if (MEM_ALIGN (target) < align)
36205 set_mem_align (target, align);
36207 else
36208 target = force_reg (tmode, op);
36209 arg_adjust = 1;
36211 else
36213 arg_adjust = 0;
36214 if (optimize
36215 || target == 0
36216 || !register_operand (target, tmode)
36217 || GET_MODE (target) != tmode)
36218 target = gen_reg_rtx (tmode);
36221 for (i = 0; i < nargs; i++)
36223 machine_mode mode = insn_p->operand[i + 1].mode;
36224 bool match;
36226 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
36227 op = expand_normal (arg);
36228 match = insn_p->operand[i + 1].predicate (op, mode);
36230 if (last_arg_constant && (i + 1) == nargs)
36232 if (!match)
36234 if (icode == CODE_FOR_lwp_lwpvalsi3
36235 || icode == CODE_FOR_lwp_lwpinssi3
36236 || icode == CODE_FOR_lwp_lwpvaldi3
36237 || icode == CODE_FOR_lwp_lwpinsdi3)
36238 error ("the last argument must be a 32-bit immediate");
36239 else
36240 error ("the last argument must be an 8-bit immediate");
36241 return const0_rtx;
36244 else
36246 if (i == memory)
36248 /* This must be the memory operand. */
36249 op = ix86_zero_extend_to_Pmode (op);
36250 op = gen_rtx_MEM (mode, op);
36251 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
36252 on it. Try to improve it using get_pointer_alignment,
36253 and if the special builtin is one that requires strict
36254 mode alignment, also from it's GET_MODE_ALIGNMENT.
36255 Failure to do so could lead to ix86_legitimate_combined_insn
36256 rejecting all changes to such insns. */
36257 unsigned int align = get_pointer_alignment (arg);
36258 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
36259 align = GET_MODE_ALIGNMENT (mode);
36260 if (MEM_ALIGN (op) < align)
36261 set_mem_align (op, align);
36263 else
36265 /* This must be register. */
36266 if (VECTOR_MODE_P (mode))
36267 op = safe_vector_operand (op, mode);
36269 op = fixup_modeless_constant (op, mode);
36271 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
36272 op = copy_to_mode_reg (mode, op);
36273 else
36275 op = copy_to_reg (op);
36276 op = lowpart_subreg (mode, op, GET_MODE (op));
36281 args[i].op = op;
36282 args[i].mode = mode;
36285 switch (nargs)
36287 case 0:
36288 pat = GEN_FCN (icode) (target);
36289 break;
36290 case 1:
36291 pat = GEN_FCN (icode) (target, args[0].op);
36292 break;
36293 case 2:
36294 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36295 break;
36296 case 3:
36297 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36298 break;
36299 default:
36300 gcc_unreachable ();
36303 if (! pat)
36304 return 0;
36305 emit_insn (pat);
36306 return klass == store ? 0 : target;
36309 /* Return the integer constant in ARG. Constrain it to be in the range
36310 of the subparts of VEC_TYPE; issue an error if not. */
36312 static int
36313 get_element_number (tree vec_type, tree arg)
36315 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
36317 if (!tree_fits_uhwi_p (arg)
36318 || (elt = tree_to_uhwi (arg), elt > max))
36320 error ("selector must be an integer constant in the range 0..%wi", max);
36321 return 0;
36324 return elt;
36327 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
36328 ix86_expand_vector_init. We DO have language-level syntax for this, in
36329 the form of (type){ init-list }. Except that since we can't place emms
36330 instructions from inside the compiler, we can't allow the use of MMX
36331 registers unless the user explicitly asks for it. So we do *not* define
36332 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
36333 we have builtins invoked by mmintrin.h that gives us license to emit
36334 these sorts of instructions. */
36336 static rtx
36337 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
36339 machine_mode tmode = TYPE_MODE (type);
36340 machine_mode inner_mode = GET_MODE_INNER (tmode);
36341 int i, n_elt = GET_MODE_NUNITS (tmode);
36342 rtvec v = rtvec_alloc (n_elt);
36344 gcc_assert (VECTOR_MODE_P (tmode));
36345 gcc_assert (call_expr_nargs (exp) == n_elt);
36347 for (i = 0; i < n_elt; ++i)
36349 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
36350 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
36353 if (!target || !register_operand (target, tmode))
36354 target = gen_reg_rtx (tmode);
36356 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
36357 return target;
36360 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
36361 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
36362 had a language-level syntax for referencing vector elements. */
36364 static rtx
36365 ix86_expand_vec_ext_builtin (tree exp, rtx target)
36367 machine_mode tmode, mode0;
36368 tree arg0, arg1;
36369 int elt;
36370 rtx op0;
36372 arg0 = CALL_EXPR_ARG (exp, 0);
36373 arg1 = CALL_EXPR_ARG (exp, 1);
36375 op0 = expand_normal (arg0);
36376 elt = get_element_number (TREE_TYPE (arg0), arg1);
36378 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
36379 mode0 = TYPE_MODE (TREE_TYPE (arg0));
36380 gcc_assert (VECTOR_MODE_P (mode0));
36382 op0 = force_reg (mode0, op0);
36384 if (optimize || !target || !register_operand (target, tmode))
36385 target = gen_reg_rtx (tmode);
36387 ix86_expand_vector_extract (true, target, op0, elt);
36389 return target;
36392 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
36393 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
36394 a language-level syntax for referencing vector elements. */
36396 static rtx
36397 ix86_expand_vec_set_builtin (tree exp)
36399 machine_mode tmode, mode1;
36400 tree arg0, arg1, arg2;
36401 int elt;
36402 rtx op0, op1, target;
36404 arg0 = CALL_EXPR_ARG (exp, 0);
36405 arg1 = CALL_EXPR_ARG (exp, 1);
36406 arg2 = CALL_EXPR_ARG (exp, 2);
36408 tmode = TYPE_MODE (TREE_TYPE (arg0));
36409 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
36410 gcc_assert (VECTOR_MODE_P (tmode));
36412 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
36413 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
36414 elt = get_element_number (TREE_TYPE (arg0), arg2);
36416 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
36417 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
36419 op0 = force_reg (tmode, op0);
36420 op1 = force_reg (mode1, op1);
36422 /* OP0 is the source of these builtin functions and shouldn't be
36423 modified. Create a copy, use it and return it as target. */
36424 target = gen_reg_rtx (tmode);
36425 emit_move_insn (target, op0);
36426 ix86_expand_vector_set (true, target, op1, elt);
36428 return target;
36431 /* Emit conditional move of SRC to DST with condition
36432 OP1 CODE OP2. */
36433 static void
36434 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
36436 rtx t;
36438 if (TARGET_CMOVE)
36440 t = ix86_expand_compare (code, op1, op2);
36441 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
36442 src, dst)));
36444 else
36446 rtx_code_label *nomove = gen_label_rtx ();
36447 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
36448 const0_rtx, GET_MODE (op1), 1, nomove);
36449 emit_move_insn (dst, src);
36450 emit_label (nomove);
36454 /* Choose max of DST and SRC and put it to DST. */
36455 static void
36456 ix86_emit_move_max (rtx dst, rtx src)
36458 ix86_emit_cmove (dst, src, LTU, dst, src);
36461 /* Expand an expression EXP that calls a built-in function,
36462 with result going to TARGET if that's convenient
36463 (and in mode MODE if that's convenient).
36464 SUBTARGET may be used as the target for computing one of EXP's operands.
36465 IGNORE is nonzero if the value is to be ignored. */
36467 static rtx
36468 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
36469 machine_mode mode, int ignore)
36471 size_t i;
36472 enum insn_code icode, icode2;
36473 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
36474 tree arg0, arg1, arg2, arg3, arg4;
36475 rtx op0, op1, op2, op3, op4, pat, pat2, insn;
36476 machine_mode mode0, mode1, mode2, mode3, mode4;
36477 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
36479 /* For CPU builtins that can be folded, fold first and expand the fold. */
36480 switch (fcode)
36482 case IX86_BUILTIN_CPU_INIT:
36484 /* Make it call __cpu_indicator_init in libgcc. */
36485 tree call_expr, fndecl, type;
36486 type = build_function_type_list (integer_type_node, NULL_TREE);
36487 fndecl = build_fn_decl ("__cpu_indicator_init", type);
36488 call_expr = build_call_expr (fndecl, 0);
36489 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
36491 case IX86_BUILTIN_CPU_IS:
36492 case IX86_BUILTIN_CPU_SUPPORTS:
36494 tree arg0 = CALL_EXPR_ARG (exp, 0);
36495 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
36496 gcc_assert (fold_expr != NULL_TREE);
36497 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
36501 HOST_WIDE_INT isa = ix86_isa_flags;
36502 HOST_WIDE_INT isa2 = ix86_isa_flags2;
36503 HOST_WIDE_INT bisa = ix86_builtins_isa[fcode].isa;
36504 HOST_WIDE_INT bisa2 = ix86_builtins_isa[fcode].isa2;
36505 /* The general case is we require all the ISAs specified in bisa{,2}
36506 to be enabled.
36507 The exceptions are:
36508 OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
36509 OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32
36510 OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4
36511 where for each this pair it is sufficient if either of the ISAs is
36512 enabled, plus if it is ored with other options also those others. */
36513 if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A))
36514 == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A))
36515 && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) != 0)
36516 isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A);
36517 if (((bisa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32))
36518 == (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32))
36519 && (isa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) != 0)
36520 isa |= (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32);
36521 if (((bisa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
36522 == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
36523 && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0)
36524 isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4);
36525 if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2)
36527 char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL,
36528 (enum fpmath_unit) 0, false);
36529 if (!opts)
36530 error ("%qE needs unknown isa option", fndecl);
36531 else
36533 gcc_assert (opts != NULL);
36534 error ("%qE needs isa option %s", fndecl, opts);
36535 free (opts);
36537 return expand_call (exp, target, ignore);
36540 switch (fcode)
36542 case IX86_BUILTIN_BNDMK:
36543 if (!target
36544 || GET_MODE (target) != BNDmode
36545 || !register_operand (target, BNDmode))
36546 target = gen_reg_rtx (BNDmode);
36548 arg0 = CALL_EXPR_ARG (exp, 0);
36549 arg1 = CALL_EXPR_ARG (exp, 1);
36551 op0 = expand_normal (arg0);
36552 op1 = expand_normal (arg1);
36554 if (!register_operand (op0, Pmode))
36555 op0 = ix86_zero_extend_to_Pmode (op0);
36556 if (!register_operand (op1, Pmode))
36557 op1 = ix86_zero_extend_to_Pmode (op1);
36559 /* Builtin arg1 is size of block but instruction op1 should
36560 be (size - 1). */
36561 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
36562 NULL_RTX, 1, OPTAB_DIRECT);
36564 emit_insn (BNDmode == BND64mode
36565 ? gen_bnd64_mk (target, op0, op1)
36566 : gen_bnd32_mk (target, op0, op1));
36567 return target;
36569 case IX86_BUILTIN_BNDSTX:
36570 arg0 = CALL_EXPR_ARG (exp, 0);
36571 arg1 = CALL_EXPR_ARG (exp, 1);
36572 arg2 = CALL_EXPR_ARG (exp, 2);
36574 op0 = expand_normal (arg0);
36575 op1 = expand_normal (arg1);
36576 op2 = expand_normal (arg2);
36578 if (!register_operand (op0, Pmode))
36579 op0 = ix86_zero_extend_to_Pmode (op0);
36580 if (!register_operand (op1, BNDmode))
36581 op1 = copy_to_mode_reg (BNDmode, op1);
36582 if (!register_operand (op2, Pmode))
36583 op2 = ix86_zero_extend_to_Pmode (op2);
36585 emit_insn (BNDmode == BND64mode
36586 ? gen_bnd64_stx (op2, op0, op1)
36587 : gen_bnd32_stx (op2, op0, op1));
36588 return 0;
36590 case IX86_BUILTIN_BNDLDX:
36591 if (!target
36592 || GET_MODE (target) != BNDmode
36593 || !register_operand (target, BNDmode))
36594 target = gen_reg_rtx (BNDmode);
36596 arg0 = CALL_EXPR_ARG (exp, 0);
36597 arg1 = CALL_EXPR_ARG (exp, 1);
36599 op0 = expand_normal (arg0);
36600 op1 = expand_normal (arg1);
36602 if (!register_operand (op0, Pmode))
36603 op0 = ix86_zero_extend_to_Pmode (op0);
36604 if (!register_operand (op1, Pmode))
36605 op1 = ix86_zero_extend_to_Pmode (op1);
36607 emit_insn (BNDmode == BND64mode
36608 ? gen_bnd64_ldx (target, op0, op1)
36609 : gen_bnd32_ldx (target, op0, op1));
36610 return target;
36612 case IX86_BUILTIN_BNDCL:
36613 arg0 = CALL_EXPR_ARG (exp, 0);
36614 arg1 = CALL_EXPR_ARG (exp, 1);
36616 op0 = expand_normal (arg0);
36617 op1 = expand_normal (arg1);
36619 if (!register_operand (op0, Pmode))
36620 op0 = ix86_zero_extend_to_Pmode (op0);
36621 if (!register_operand (op1, BNDmode))
36622 op1 = copy_to_mode_reg (BNDmode, op1);
36624 emit_insn (BNDmode == BND64mode
36625 ? gen_bnd64_cl (op1, op0)
36626 : gen_bnd32_cl (op1, op0));
36627 return 0;
36629 case IX86_BUILTIN_BNDCU:
36630 arg0 = CALL_EXPR_ARG (exp, 0);
36631 arg1 = CALL_EXPR_ARG (exp, 1);
36633 op0 = expand_normal (arg0);
36634 op1 = expand_normal (arg1);
36636 if (!register_operand (op0, Pmode))
36637 op0 = ix86_zero_extend_to_Pmode (op0);
36638 if (!register_operand (op1, BNDmode))
36639 op1 = copy_to_mode_reg (BNDmode, op1);
36641 emit_insn (BNDmode == BND64mode
36642 ? gen_bnd64_cu (op1, op0)
36643 : gen_bnd32_cu (op1, op0));
36644 return 0;
36646 case IX86_BUILTIN_BNDRET:
36647 arg0 = CALL_EXPR_ARG (exp, 0);
36648 target = chkp_get_rtl_bounds (arg0);
36650 /* If no bounds were specified for returned value,
36651 then use INIT bounds. It usually happens when
36652 some built-in function is expanded. */
36653 if (!target)
36655 rtx t1 = gen_reg_rtx (Pmode);
36656 rtx t2 = gen_reg_rtx (Pmode);
36657 target = gen_reg_rtx (BNDmode);
36658 emit_move_insn (t1, const0_rtx);
36659 emit_move_insn (t2, constm1_rtx);
36660 emit_insn (BNDmode == BND64mode
36661 ? gen_bnd64_mk (target, t1, t2)
36662 : gen_bnd32_mk (target, t1, t2));
36665 gcc_assert (target && REG_P (target));
36666 return target;
36668 case IX86_BUILTIN_BNDNARROW:
36670 rtx m1, m1h1, m1h2, lb, ub, t1;
36672 /* Return value and lb. */
36673 arg0 = CALL_EXPR_ARG (exp, 0);
36674 /* Bounds. */
36675 arg1 = CALL_EXPR_ARG (exp, 1);
36676 /* Size. */
36677 arg2 = CALL_EXPR_ARG (exp, 2);
36679 lb = expand_normal (arg0);
36680 op1 = expand_normal (arg1);
36681 op2 = expand_normal (arg2);
36683 /* Size was passed but we need to use (size - 1) as for bndmk. */
36684 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
36685 NULL_RTX, 1, OPTAB_DIRECT);
36687 /* Add LB to size and inverse to get UB. */
36688 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
36689 op2, 1, OPTAB_DIRECT);
36690 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
36692 if (!register_operand (lb, Pmode))
36693 lb = ix86_zero_extend_to_Pmode (lb);
36694 if (!register_operand (ub, Pmode))
36695 ub = ix86_zero_extend_to_Pmode (ub);
36697 /* We need to move bounds to memory before any computations. */
36698 if (MEM_P (op1))
36699 m1 = op1;
36700 else
36702 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
36703 emit_move_insn (m1, op1);
36706 /* Generate mem expression to be used for access to LB and UB. */
36707 m1h1 = adjust_address (m1, Pmode, 0);
36708 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
36710 t1 = gen_reg_rtx (Pmode);
36712 /* Compute LB. */
36713 emit_move_insn (t1, m1h1);
36714 ix86_emit_move_max (t1, lb);
36715 emit_move_insn (m1h1, t1);
36717 /* Compute UB. UB is stored in 1's complement form. Therefore
36718 we also use max here. */
36719 emit_move_insn (t1, m1h2);
36720 ix86_emit_move_max (t1, ub);
36721 emit_move_insn (m1h2, t1);
36723 op2 = gen_reg_rtx (BNDmode);
36724 emit_move_insn (op2, m1);
36726 return chkp_join_splitted_slot (lb, op2);
36729 case IX86_BUILTIN_BNDINT:
36731 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
36733 if (!target
36734 || GET_MODE (target) != BNDmode
36735 || !register_operand (target, BNDmode))
36736 target = gen_reg_rtx (BNDmode);
36738 arg0 = CALL_EXPR_ARG (exp, 0);
36739 arg1 = CALL_EXPR_ARG (exp, 1);
36741 op0 = expand_normal (arg0);
36742 op1 = expand_normal (arg1);
36744 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
36745 rh1 = adjust_address (res, Pmode, 0);
36746 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
36748 /* Put first bounds to temporaries. */
36749 lb1 = gen_reg_rtx (Pmode);
36750 ub1 = gen_reg_rtx (Pmode);
36751 if (MEM_P (op0))
36753 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
36754 emit_move_insn (ub1, adjust_address (op0, Pmode,
36755 GET_MODE_SIZE (Pmode)));
36757 else
36759 emit_move_insn (res, op0);
36760 emit_move_insn (lb1, rh1);
36761 emit_move_insn (ub1, rh2);
36764 /* Put second bounds to temporaries. */
36765 lb2 = gen_reg_rtx (Pmode);
36766 ub2 = gen_reg_rtx (Pmode);
36767 if (MEM_P (op1))
36769 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
36770 emit_move_insn (ub2, adjust_address (op1, Pmode,
36771 GET_MODE_SIZE (Pmode)));
36773 else
36775 emit_move_insn (res, op1);
36776 emit_move_insn (lb2, rh1);
36777 emit_move_insn (ub2, rh2);
36780 /* Compute LB. */
36781 ix86_emit_move_max (lb1, lb2);
36782 emit_move_insn (rh1, lb1);
36784 /* Compute UB. UB is stored in 1's complement form. Therefore
36785 we also use max here. */
36786 ix86_emit_move_max (ub1, ub2);
36787 emit_move_insn (rh2, ub1);
36789 emit_move_insn (target, res);
36791 return target;
36794 case IX86_BUILTIN_SIZEOF:
36796 tree name;
36797 rtx symbol;
36799 if (!target
36800 || GET_MODE (target) != Pmode
36801 || !register_operand (target, Pmode))
36802 target = gen_reg_rtx (Pmode);
36804 arg0 = CALL_EXPR_ARG (exp, 0);
36805 gcc_assert (VAR_P (arg0));
36807 name = DECL_ASSEMBLER_NAME (arg0);
36808 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
36810 emit_insn (Pmode == SImode
36811 ? gen_move_size_reloc_si (target, symbol)
36812 : gen_move_size_reloc_di (target, symbol));
36814 return target;
36817 case IX86_BUILTIN_BNDLOWER:
36819 rtx mem, hmem;
36821 if (!target
36822 || GET_MODE (target) != Pmode
36823 || !register_operand (target, Pmode))
36824 target = gen_reg_rtx (Pmode);
36826 arg0 = CALL_EXPR_ARG (exp, 0);
36827 op0 = expand_normal (arg0);
36829 /* We need to move bounds to memory first. */
36830 if (MEM_P (op0))
36831 mem = op0;
36832 else
36834 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
36835 emit_move_insn (mem, op0);
36838 /* Generate mem expression to access LB and load it. */
36839 hmem = adjust_address (mem, Pmode, 0);
36840 emit_move_insn (target, hmem);
36842 return target;
36845 case IX86_BUILTIN_BNDUPPER:
36847 rtx mem, hmem, res;
36849 if (!target
36850 || GET_MODE (target) != Pmode
36851 || !register_operand (target, Pmode))
36852 target = gen_reg_rtx (Pmode);
36854 arg0 = CALL_EXPR_ARG (exp, 0);
36855 op0 = expand_normal (arg0);
36857 /* We need to move bounds to memory first. */
36858 if (MEM_P (op0))
36859 mem = op0;
36860 else
36862 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
36863 emit_move_insn (mem, op0);
36866 /* Generate mem expression to access UB. */
36867 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
36869 /* We need to inverse all bits of UB. */
36870 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
36872 if (res != target)
36873 emit_move_insn (target, res);
36875 return target;
36878 case IX86_BUILTIN_MASKMOVQ:
36879 case IX86_BUILTIN_MASKMOVDQU:
36880 icode = (fcode == IX86_BUILTIN_MASKMOVQ
36881 ? CODE_FOR_mmx_maskmovq
36882 : CODE_FOR_sse2_maskmovdqu);
36883 /* Note the arg order is different from the operand order. */
36884 arg1 = CALL_EXPR_ARG (exp, 0);
36885 arg2 = CALL_EXPR_ARG (exp, 1);
36886 arg0 = CALL_EXPR_ARG (exp, 2);
36887 op0 = expand_normal (arg0);
36888 op1 = expand_normal (arg1);
36889 op2 = expand_normal (arg2);
36890 mode0 = insn_data[icode].operand[0].mode;
36891 mode1 = insn_data[icode].operand[1].mode;
36892 mode2 = insn_data[icode].operand[2].mode;
36894 op0 = ix86_zero_extend_to_Pmode (op0);
36895 op0 = gen_rtx_MEM (mode1, op0);
36897 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36898 op0 = copy_to_mode_reg (mode0, op0);
36899 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36900 op1 = copy_to_mode_reg (mode1, op1);
36901 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36902 op2 = copy_to_mode_reg (mode2, op2);
36903 pat = GEN_FCN (icode) (op0, op1, op2);
36904 if (! pat)
36905 return 0;
36906 emit_insn (pat);
36907 return 0;
36909 case IX86_BUILTIN_LDMXCSR:
36910 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
36911 target = assign_386_stack_local (SImode, SLOT_TEMP);
36912 emit_move_insn (target, op0);
36913 emit_insn (gen_sse_ldmxcsr (target));
36914 return 0;
36916 case IX86_BUILTIN_STMXCSR:
36917 target = assign_386_stack_local (SImode, SLOT_TEMP);
36918 emit_insn (gen_sse_stmxcsr (target));
36919 return copy_to_mode_reg (SImode, target);
36921 case IX86_BUILTIN_CLFLUSH:
36922 arg0 = CALL_EXPR_ARG (exp, 0);
36923 op0 = expand_normal (arg0);
36924 icode = CODE_FOR_sse2_clflush;
36925 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36926 op0 = ix86_zero_extend_to_Pmode (op0);
36928 emit_insn (gen_sse2_clflush (op0));
36929 return 0;
36931 case IX86_BUILTIN_CLWB:
36932 arg0 = CALL_EXPR_ARG (exp, 0);
36933 op0 = expand_normal (arg0);
36934 icode = CODE_FOR_clwb;
36935 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36936 op0 = ix86_zero_extend_to_Pmode (op0);
36938 emit_insn (gen_clwb (op0));
36939 return 0;
36941 case IX86_BUILTIN_CLFLUSHOPT:
36942 arg0 = CALL_EXPR_ARG (exp, 0);
36943 op0 = expand_normal (arg0);
36944 icode = CODE_FOR_clflushopt;
36945 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36946 op0 = ix86_zero_extend_to_Pmode (op0);
36948 emit_insn (gen_clflushopt (op0));
36949 return 0;
36951 case IX86_BUILTIN_MONITOR:
36952 case IX86_BUILTIN_MONITORX:
36953 arg0 = CALL_EXPR_ARG (exp, 0);
36954 arg1 = CALL_EXPR_ARG (exp, 1);
36955 arg2 = CALL_EXPR_ARG (exp, 2);
36956 op0 = expand_normal (arg0);
36957 op1 = expand_normal (arg1);
36958 op2 = expand_normal (arg2);
36959 if (!REG_P (op0))
36960 op0 = ix86_zero_extend_to_Pmode (op0);
36961 if (!REG_P (op1))
36962 op1 = copy_to_mode_reg (SImode, op1);
36963 if (!REG_P (op2))
36964 op2 = copy_to_mode_reg (SImode, op2);
36966 emit_insn (fcode == IX86_BUILTIN_MONITOR
36967 ? ix86_gen_monitor (op0, op1, op2)
36968 : ix86_gen_monitorx (op0, op1, op2));
36969 return 0;
36971 case IX86_BUILTIN_MWAIT:
36972 arg0 = CALL_EXPR_ARG (exp, 0);
36973 arg1 = CALL_EXPR_ARG (exp, 1);
36974 op0 = expand_normal (arg0);
36975 op1 = expand_normal (arg1);
36976 if (!REG_P (op0))
36977 op0 = copy_to_mode_reg (SImode, op0);
36978 if (!REG_P (op1))
36979 op1 = copy_to_mode_reg (SImode, op1);
36980 emit_insn (gen_sse3_mwait (op0, op1));
36981 return 0;
36983 case IX86_BUILTIN_MWAITX:
36984 arg0 = CALL_EXPR_ARG (exp, 0);
36985 arg1 = CALL_EXPR_ARG (exp, 1);
36986 arg2 = CALL_EXPR_ARG (exp, 2);
36987 op0 = expand_normal (arg0);
36988 op1 = expand_normal (arg1);
36989 op2 = expand_normal (arg2);
36990 if (!REG_P (op0))
36991 op0 = copy_to_mode_reg (SImode, op0);
36992 if (!REG_P (op1))
36993 op1 = copy_to_mode_reg (SImode, op1);
36994 if (!REG_P (op2))
36995 op2 = copy_to_mode_reg (SImode, op2);
36996 emit_insn (gen_mwaitx (op0, op1, op2));
36997 return 0;
36999 case IX86_BUILTIN_CLZERO:
37000 arg0 = CALL_EXPR_ARG (exp, 0);
37001 op0 = expand_normal (arg0);
37002 if (!REG_P (op0))
37003 op0 = ix86_zero_extend_to_Pmode (op0);
37004 emit_insn (ix86_gen_clzero (op0));
37005 return 0;
37007 case IX86_BUILTIN_VEC_INIT_V2SI:
37008 case IX86_BUILTIN_VEC_INIT_V4HI:
37009 case IX86_BUILTIN_VEC_INIT_V8QI:
37010 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
37012 case IX86_BUILTIN_VEC_EXT_V2DF:
37013 case IX86_BUILTIN_VEC_EXT_V2DI:
37014 case IX86_BUILTIN_VEC_EXT_V4SF:
37015 case IX86_BUILTIN_VEC_EXT_V4SI:
37016 case IX86_BUILTIN_VEC_EXT_V8HI:
37017 case IX86_BUILTIN_VEC_EXT_V2SI:
37018 case IX86_BUILTIN_VEC_EXT_V4HI:
37019 case IX86_BUILTIN_VEC_EXT_V16QI:
37020 return ix86_expand_vec_ext_builtin (exp, target);
37022 case IX86_BUILTIN_VEC_SET_V2DI:
37023 case IX86_BUILTIN_VEC_SET_V4SF:
37024 case IX86_BUILTIN_VEC_SET_V4SI:
37025 case IX86_BUILTIN_VEC_SET_V8HI:
37026 case IX86_BUILTIN_VEC_SET_V4HI:
37027 case IX86_BUILTIN_VEC_SET_V16QI:
37028 return ix86_expand_vec_set_builtin (exp);
37030 case IX86_BUILTIN_NANQ:
37031 case IX86_BUILTIN_NANSQ:
37032 return expand_call (exp, target, ignore);
37034 case IX86_BUILTIN_RDPID:
37036 op0 = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
37038 if (TARGET_64BIT)
37040 insn = gen_rdpid_rex64 (op0);
37041 op0 = convert_to_mode (SImode, op0, 1);
37043 else
37044 insn = gen_rdpid (op0);
37045 emit_insn (insn);
37047 if (target == 0)
37049 /* mode is VOIDmode if __builtin_rdpid has been called
37050 without lhs. */
37051 if (mode == VOIDmode)
37052 return target;
37053 target = gen_reg_rtx (mode);
37055 emit_move_insn (target, op0);
37056 return target;
37057 case IX86_BUILTIN_RDPMC:
37058 case IX86_BUILTIN_RDTSC:
37059 case IX86_BUILTIN_RDTSCP:
37060 case IX86_BUILTIN_XGETBV:
37062 op0 = gen_reg_rtx (DImode);
37063 op1 = gen_reg_rtx (DImode);
37065 if (fcode == IX86_BUILTIN_RDPMC)
37067 arg0 = CALL_EXPR_ARG (exp, 0);
37068 op2 = expand_normal (arg0);
37069 if (!register_operand (op2, SImode))
37070 op2 = copy_to_mode_reg (SImode, op2);
37072 insn = (TARGET_64BIT
37073 ? gen_rdpmc_rex64 (op0, op1, op2)
37074 : gen_rdpmc (op0, op2));
37075 emit_insn (insn);
37077 else if (fcode == IX86_BUILTIN_XGETBV)
37079 arg0 = CALL_EXPR_ARG (exp, 0);
37080 op2 = expand_normal (arg0);
37081 if (!register_operand (op2, SImode))
37082 op2 = copy_to_mode_reg (SImode, op2);
37084 insn = (TARGET_64BIT
37085 ? gen_xgetbv_rex64 (op0, op1, op2)
37086 : gen_xgetbv (op0, op2));
37087 emit_insn (insn);
37089 else if (fcode == IX86_BUILTIN_RDTSC)
37091 insn = (TARGET_64BIT
37092 ? gen_rdtsc_rex64 (op0, op1)
37093 : gen_rdtsc (op0));
37094 emit_insn (insn);
37096 else
37098 op2 = gen_reg_rtx (SImode);
37100 insn = (TARGET_64BIT
37101 ? gen_rdtscp_rex64 (op0, op1, op2)
37102 : gen_rdtscp (op0, op2));
37103 emit_insn (insn);
37105 arg0 = CALL_EXPR_ARG (exp, 0);
37106 op4 = expand_normal (arg0);
37107 if (!address_operand (op4, VOIDmode))
37109 op4 = convert_memory_address (Pmode, op4);
37110 op4 = copy_addr_to_reg (op4);
37112 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
37115 if (target == 0)
37117 /* mode is VOIDmode if __builtin_rd* has been called
37118 without lhs. */
37119 if (mode == VOIDmode)
37120 return target;
37121 target = gen_reg_rtx (mode);
37124 if (TARGET_64BIT)
37126 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
37127 op1, 1, OPTAB_DIRECT);
37128 op0 = expand_simple_binop (DImode, IOR, op0, op1,
37129 op0, 1, OPTAB_DIRECT);
37132 emit_move_insn (target, op0);
37133 return target;
37135 case IX86_BUILTIN_FXSAVE:
37136 case IX86_BUILTIN_FXRSTOR:
37137 case IX86_BUILTIN_FXSAVE64:
37138 case IX86_BUILTIN_FXRSTOR64:
37139 case IX86_BUILTIN_FNSTENV:
37140 case IX86_BUILTIN_FLDENV:
37141 mode0 = BLKmode;
37142 switch (fcode)
37144 case IX86_BUILTIN_FXSAVE:
37145 icode = CODE_FOR_fxsave;
37146 break;
37147 case IX86_BUILTIN_FXRSTOR:
37148 icode = CODE_FOR_fxrstor;
37149 break;
37150 case IX86_BUILTIN_FXSAVE64:
37151 icode = CODE_FOR_fxsave64;
37152 break;
37153 case IX86_BUILTIN_FXRSTOR64:
37154 icode = CODE_FOR_fxrstor64;
37155 break;
37156 case IX86_BUILTIN_FNSTENV:
37157 icode = CODE_FOR_fnstenv;
37158 break;
37159 case IX86_BUILTIN_FLDENV:
37160 icode = CODE_FOR_fldenv;
37161 break;
37162 default:
37163 gcc_unreachable ();
37166 arg0 = CALL_EXPR_ARG (exp, 0);
37167 op0 = expand_normal (arg0);
37169 if (!address_operand (op0, VOIDmode))
37171 op0 = convert_memory_address (Pmode, op0);
37172 op0 = copy_addr_to_reg (op0);
37174 op0 = gen_rtx_MEM (mode0, op0);
37176 pat = GEN_FCN (icode) (op0);
37177 if (pat)
37178 emit_insn (pat);
37179 return 0;
37181 case IX86_BUILTIN_XSETBV:
37182 arg0 = CALL_EXPR_ARG (exp, 0);
37183 arg1 = CALL_EXPR_ARG (exp, 1);
37184 op0 = expand_normal (arg0);
37185 op1 = expand_normal (arg1);
37187 if (!REG_P (op0))
37188 op0 = copy_to_mode_reg (SImode, op0);
37190 if (TARGET_64BIT)
37192 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
37193 NULL, 1, OPTAB_DIRECT);
37195 op2 = gen_lowpart (SImode, op2);
37196 op1 = gen_lowpart (SImode, op1);
37197 if (!REG_P (op1))
37198 op1 = copy_to_mode_reg (SImode, op1);
37199 if (!REG_P (op2))
37200 op2 = copy_to_mode_reg (SImode, op2);
37201 icode = CODE_FOR_xsetbv_rex64;
37202 pat = GEN_FCN (icode) (op0, op1, op2);
37204 else
37206 if (!REG_P (op1))
37207 op1 = copy_to_mode_reg (DImode, op1);
37208 icode = CODE_FOR_xsetbv;
37209 pat = GEN_FCN (icode) (op0, op1);
37211 if (pat)
37212 emit_insn (pat);
37213 return 0;
37215 case IX86_BUILTIN_XSAVE:
37216 case IX86_BUILTIN_XRSTOR:
37217 case IX86_BUILTIN_XSAVE64:
37218 case IX86_BUILTIN_XRSTOR64:
37219 case IX86_BUILTIN_XSAVEOPT:
37220 case IX86_BUILTIN_XSAVEOPT64:
37221 case IX86_BUILTIN_XSAVES:
37222 case IX86_BUILTIN_XRSTORS:
37223 case IX86_BUILTIN_XSAVES64:
37224 case IX86_BUILTIN_XRSTORS64:
37225 case IX86_BUILTIN_XSAVEC:
37226 case IX86_BUILTIN_XSAVEC64:
37227 arg0 = CALL_EXPR_ARG (exp, 0);
37228 arg1 = CALL_EXPR_ARG (exp, 1);
37229 op0 = expand_normal (arg0);
37230 op1 = expand_normal (arg1);
37232 if (!address_operand (op0, VOIDmode))
37234 op0 = convert_memory_address (Pmode, op0);
37235 op0 = copy_addr_to_reg (op0);
37237 op0 = gen_rtx_MEM (BLKmode, op0);
37239 op1 = force_reg (DImode, op1);
37241 if (TARGET_64BIT)
37243 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
37244 NULL, 1, OPTAB_DIRECT);
37245 switch (fcode)
37247 case IX86_BUILTIN_XSAVE:
37248 icode = CODE_FOR_xsave_rex64;
37249 break;
37250 case IX86_BUILTIN_XRSTOR:
37251 icode = CODE_FOR_xrstor_rex64;
37252 break;
37253 case IX86_BUILTIN_XSAVE64:
37254 icode = CODE_FOR_xsave64;
37255 break;
37256 case IX86_BUILTIN_XRSTOR64:
37257 icode = CODE_FOR_xrstor64;
37258 break;
37259 case IX86_BUILTIN_XSAVEOPT:
37260 icode = CODE_FOR_xsaveopt_rex64;
37261 break;
37262 case IX86_BUILTIN_XSAVEOPT64:
37263 icode = CODE_FOR_xsaveopt64;
37264 break;
37265 case IX86_BUILTIN_XSAVES:
37266 icode = CODE_FOR_xsaves_rex64;
37267 break;
37268 case IX86_BUILTIN_XRSTORS:
37269 icode = CODE_FOR_xrstors_rex64;
37270 break;
37271 case IX86_BUILTIN_XSAVES64:
37272 icode = CODE_FOR_xsaves64;
37273 break;
37274 case IX86_BUILTIN_XRSTORS64:
37275 icode = CODE_FOR_xrstors64;
37276 break;
37277 case IX86_BUILTIN_XSAVEC:
37278 icode = CODE_FOR_xsavec_rex64;
37279 break;
37280 case IX86_BUILTIN_XSAVEC64:
37281 icode = CODE_FOR_xsavec64;
37282 break;
37283 default:
37284 gcc_unreachable ();
37287 op2 = gen_lowpart (SImode, op2);
37288 op1 = gen_lowpart (SImode, op1);
37289 pat = GEN_FCN (icode) (op0, op1, op2);
37291 else
37293 switch (fcode)
37295 case IX86_BUILTIN_XSAVE:
37296 icode = CODE_FOR_xsave;
37297 break;
37298 case IX86_BUILTIN_XRSTOR:
37299 icode = CODE_FOR_xrstor;
37300 break;
37301 case IX86_BUILTIN_XSAVEOPT:
37302 icode = CODE_FOR_xsaveopt;
37303 break;
37304 case IX86_BUILTIN_XSAVES:
37305 icode = CODE_FOR_xsaves;
37306 break;
37307 case IX86_BUILTIN_XRSTORS:
37308 icode = CODE_FOR_xrstors;
37309 break;
37310 case IX86_BUILTIN_XSAVEC:
37311 icode = CODE_FOR_xsavec;
37312 break;
37313 default:
37314 gcc_unreachable ();
37316 pat = GEN_FCN (icode) (op0, op1);
37319 if (pat)
37320 emit_insn (pat);
37321 return 0;
37323 case IX86_BUILTIN_LLWPCB:
37324 arg0 = CALL_EXPR_ARG (exp, 0);
37325 op0 = expand_normal (arg0);
37326 icode = CODE_FOR_lwp_llwpcb;
37327 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
37328 op0 = ix86_zero_extend_to_Pmode (op0);
37329 emit_insn (gen_lwp_llwpcb (op0));
37330 return 0;
37332 case IX86_BUILTIN_SLWPCB:
37333 icode = CODE_FOR_lwp_slwpcb;
37334 if (!target
37335 || !insn_data[icode].operand[0].predicate (target, Pmode))
37336 target = gen_reg_rtx (Pmode);
37337 emit_insn (gen_lwp_slwpcb (target));
37338 return target;
37340 case IX86_BUILTIN_BEXTRI32:
37341 case IX86_BUILTIN_BEXTRI64:
37342 arg0 = CALL_EXPR_ARG (exp, 0);
37343 arg1 = CALL_EXPR_ARG (exp, 1);
37344 op0 = expand_normal (arg0);
37345 op1 = expand_normal (arg1);
37346 icode = (fcode == IX86_BUILTIN_BEXTRI32
37347 ? CODE_FOR_tbm_bextri_si
37348 : CODE_FOR_tbm_bextri_di);
37349 if (!CONST_INT_P (op1))
37351 error ("last argument must be an immediate");
37352 return const0_rtx;
37354 else
37356 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
37357 unsigned char lsb_index = INTVAL (op1) & 0xFF;
37358 op1 = GEN_INT (length);
37359 op2 = GEN_INT (lsb_index);
37360 pat = GEN_FCN (icode) (target, op0, op1, op2);
37361 if (pat)
37362 emit_insn (pat);
37363 return target;
37366 case IX86_BUILTIN_RDRAND16_STEP:
37367 icode = CODE_FOR_rdrandhi_1;
37368 mode0 = HImode;
37369 goto rdrand_step;
37371 case IX86_BUILTIN_RDRAND32_STEP:
37372 icode = CODE_FOR_rdrandsi_1;
37373 mode0 = SImode;
37374 goto rdrand_step;
37376 case IX86_BUILTIN_RDRAND64_STEP:
37377 icode = CODE_FOR_rdranddi_1;
37378 mode0 = DImode;
37380 rdrand_step:
37381 arg0 = CALL_EXPR_ARG (exp, 0);
37382 op1 = expand_normal (arg0);
37383 if (!address_operand (op1, VOIDmode))
37385 op1 = convert_memory_address (Pmode, op1);
37386 op1 = copy_addr_to_reg (op1);
37389 op0 = gen_reg_rtx (mode0);
37390 emit_insn (GEN_FCN (icode) (op0));
37392 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
37394 op1 = gen_reg_rtx (SImode);
37395 emit_move_insn (op1, CONST1_RTX (SImode));
37397 /* Emit SImode conditional move. */
37398 if (mode0 == HImode)
37400 if (TARGET_ZERO_EXTEND_WITH_AND
37401 && optimize_function_for_speed_p (cfun))
37403 op2 = force_reg (SImode, const0_rtx);
37405 emit_insn (gen_movstricthi
37406 (gen_lowpart (HImode, op2), op0));
37408 else
37410 op2 = gen_reg_rtx (SImode);
37412 emit_insn (gen_zero_extendhisi2 (op2, op0));
37415 else if (mode0 == SImode)
37416 op2 = op0;
37417 else
37418 op2 = gen_rtx_SUBREG (SImode, op0, 0);
37420 if (target == 0
37421 || !register_operand (target, SImode))
37422 target = gen_reg_rtx (SImode);
37424 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
37425 const0_rtx);
37426 emit_insn (gen_rtx_SET (target,
37427 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
37428 return target;
37430 case IX86_BUILTIN_RDSEED16_STEP:
37431 icode = CODE_FOR_rdseedhi_1;
37432 mode0 = HImode;
37433 goto rdseed_step;
37435 case IX86_BUILTIN_RDSEED32_STEP:
37436 icode = CODE_FOR_rdseedsi_1;
37437 mode0 = SImode;
37438 goto rdseed_step;
37440 case IX86_BUILTIN_RDSEED64_STEP:
37441 icode = CODE_FOR_rdseeddi_1;
37442 mode0 = DImode;
37444 rdseed_step:
37445 arg0 = CALL_EXPR_ARG (exp, 0);
37446 op1 = expand_normal (arg0);
37447 if (!address_operand (op1, VOIDmode))
37449 op1 = convert_memory_address (Pmode, op1);
37450 op1 = copy_addr_to_reg (op1);
37453 op0 = gen_reg_rtx (mode0);
37454 emit_insn (GEN_FCN (icode) (op0));
37456 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
37458 op2 = gen_reg_rtx (QImode);
37460 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
37461 const0_rtx);
37462 emit_insn (gen_rtx_SET (op2, pat));
37464 if (target == 0
37465 || !register_operand (target, SImode))
37466 target = gen_reg_rtx (SImode);
37468 emit_insn (gen_zero_extendqisi2 (target, op2));
37469 return target;
37471 case IX86_BUILTIN_SBB32:
37472 icode = CODE_FOR_subborrowsi;
37473 icode2 = CODE_FOR_subborrowsi_0;
37474 mode0 = SImode;
37475 mode1 = DImode;
37476 mode2 = CCmode;
37477 goto handlecarry;
37479 case IX86_BUILTIN_SBB64:
37480 icode = CODE_FOR_subborrowdi;
37481 icode2 = CODE_FOR_subborrowdi_0;
37482 mode0 = DImode;
37483 mode1 = TImode;
37484 mode2 = CCmode;
37485 goto handlecarry;
37487 case IX86_BUILTIN_ADDCARRYX32:
37488 icode = CODE_FOR_addcarrysi;
37489 icode2 = CODE_FOR_addcarrysi_0;
37490 mode0 = SImode;
37491 mode1 = DImode;
37492 mode2 = CCCmode;
37493 goto handlecarry;
37495 case IX86_BUILTIN_ADDCARRYX64:
37496 icode = CODE_FOR_addcarrydi;
37497 icode2 = CODE_FOR_addcarrydi_0;
37498 mode0 = DImode;
37499 mode1 = TImode;
37500 mode2 = CCCmode;
37502 handlecarry:
37503 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
37504 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
37505 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
37506 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
37508 op1 = expand_normal (arg0);
37509 if (!integer_zerop (arg0))
37510 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
37512 op2 = expand_normal (arg1);
37513 if (!register_operand (op2, mode0))
37514 op2 = copy_to_mode_reg (mode0, op2);
37516 op3 = expand_normal (arg2);
37517 if (!register_operand (op3, mode0))
37518 op3 = copy_to_mode_reg (mode0, op3);
37520 op4 = expand_normal (arg3);
37521 if (!address_operand (op4, VOIDmode))
37523 op4 = convert_memory_address (Pmode, op4);
37524 op4 = copy_addr_to_reg (op4);
37527 op0 = gen_reg_rtx (mode0);
37528 if (integer_zerop (arg0))
37530 /* If arg0 is 0, optimize right away into add or sub
37531 instruction that sets CCCmode flags. */
37532 op1 = gen_rtx_REG (mode2, FLAGS_REG);
37533 emit_insn (GEN_FCN (icode2) (op0, op2, op3));
37535 else
37537 /* Generate CF from input operand. */
37538 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
37540 /* Generate instruction that consumes CF. */
37541 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
37542 pat = gen_rtx_LTU (mode1, op1, const0_rtx);
37543 pat2 = gen_rtx_LTU (mode0, op1, const0_rtx);
37544 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat, pat2));
37547 /* Return current CF value. */
37548 if (target == 0)
37549 target = gen_reg_rtx (QImode);
37551 pat = gen_rtx_LTU (QImode, op1, const0_rtx);
37552 emit_insn (gen_rtx_SET (target, pat));
37554 /* Store the result. */
37555 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
37557 return target;
37559 case IX86_BUILTIN_READ_FLAGS:
37560 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
37562 if (optimize
37563 || target == NULL_RTX
37564 || !nonimmediate_operand (target, word_mode)
37565 || GET_MODE (target) != word_mode)
37566 target = gen_reg_rtx (word_mode);
37568 emit_insn (gen_pop (target));
37569 return target;
37571 case IX86_BUILTIN_WRITE_FLAGS:
37573 arg0 = CALL_EXPR_ARG (exp, 0);
37574 op0 = expand_normal (arg0);
37575 if (!general_no_elim_operand (op0, word_mode))
37576 op0 = copy_to_mode_reg (word_mode, op0);
37578 emit_insn (gen_push (op0));
37579 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
37580 return 0;
37582 case IX86_BUILTIN_KTESTC8:
37583 icode = CODE_FOR_ktestqi;
37584 mode3 = CCCmode;
37585 goto kortest;
37587 case IX86_BUILTIN_KTESTZ8:
37588 icode = CODE_FOR_ktestqi;
37589 mode3 = CCZmode;
37590 goto kortest;
37592 case IX86_BUILTIN_KTESTC16:
37593 icode = CODE_FOR_ktesthi;
37594 mode3 = CCCmode;
37595 goto kortest;
37597 case IX86_BUILTIN_KTESTZ16:
37598 icode = CODE_FOR_ktesthi;
37599 mode3 = CCZmode;
37600 goto kortest;
37602 case IX86_BUILTIN_KTESTC32:
37603 icode = CODE_FOR_ktestsi;
37604 mode3 = CCCmode;
37605 goto kortest;
37607 case IX86_BUILTIN_KTESTZ32:
37608 icode = CODE_FOR_ktestsi;
37609 mode3 = CCZmode;
37610 goto kortest;
37612 case IX86_BUILTIN_KTESTC64:
37613 icode = CODE_FOR_ktestdi;
37614 mode3 = CCCmode;
37615 goto kortest;
37617 case IX86_BUILTIN_KTESTZ64:
37618 icode = CODE_FOR_ktestdi;
37619 mode3 = CCZmode;
37620 goto kortest;
37622 case IX86_BUILTIN_KORTESTC8:
37623 icode = CODE_FOR_kortestqi;
37624 mode3 = CCCmode;
37625 goto kortest;
37627 case IX86_BUILTIN_KORTESTZ8:
37628 icode = CODE_FOR_kortestqi;
37629 mode3 = CCZmode;
37630 goto kortest;
37632 case IX86_BUILTIN_KORTESTC16:
37633 icode = CODE_FOR_kortesthi;
37634 mode3 = CCCmode;
37635 goto kortest;
37637 case IX86_BUILTIN_KORTESTZ16:
37638 icode = CODE_FOR_kortesthi;
37639 mode3 = CCZmode;
37640 goto kortest;
37642 case IX86_BUILTIN_KORTESTC32:
37643 icode = CODE_FOR_kortestsi;
37644 mode3 = CCCmode;
37645 goto kortest;
37647 case IX86_BUILTIN_KORTESTZ32:
37648 icode = CODE_FOR_kortestsi;
37649 mode3 = CCZmode;
37650 goto kortest;
37652 case IX86_BUILTIN_KORTESTC64:
37653 icode = CODE_FOR_kortestdi;
37654 mode3 = CCCmode;
37655 goto kortest;
37657 case IX86_BUILTIN_KORTESTZ64:
37658 icode = CODE_FOR_kortestdi;
37659 mode3 = CCZmode;
37661 kortest:
37662 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
37663 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
37664 op0 = expand_normal (arg0);
37665 op1 = expand_normal (arg1);
37667 mode0 = insn_data[icode].operand[0].mode;
37668 mode1 = insn_data[icode].operand[1].mode;
37670 if (GET_MODE (op0) != VOIDmode)
37671 op0 = force_reg (GET_MODE (op0), op0);
37673 op0 = gen_lowpart (mode0, op0);
37675 if (!insn_data[icode].operand[0].predicate (op0, mode0))
37676 op0 = copy_to_mode_reg (mode0, op0);
37678 if (GET_MODE (op1) != VOIDmode)
37679 op1 = force_reg (GET_MODE (op1), op1);
37681 op1 = gen_lowpart (mode1, op1);
37683 if (!insn_data[icode].operand[1].predicate (op1, mode1))
37684 op1 = copy_to_mode_reg (mode1, op1);
37686 target = gen_reg_rtx (QImode);
37688 /* Emit kortest. */
37689 emit_insn (GEN_FCN (icode) (op0, op1));
37690 /* And use setcc to return result from flags. */
37691 ix86_expand_setcc (target, EQ,
37692 gen_rtx_REG (mode3, FLAGS_REG), const0_rtx);
37693 return target;
37695 case IX86_BUILTIN_GATHERSIV2DF:
37696 icode = CODE_FOR_avx2_gathersiv2df;
37697 goto gather_gen;
37698 case IX86_BUILTIN_GATHERSIV4DF:
37699 icode = CODE_FOR_avx2_gathersiv4df;
37700 goto gather_gen;
37701 case IX86_BUILTIN_GATHERDIV2DF:
37702 icode = CODE_FOR_avx2_gatherdiv2df;
37703 goto gather_gen;
37704 case IX86_BUILTIN_GATHERDIV4DF:
37705 icode = CODE_FOR_avx2_gatherdiv4df;
37706 goto gather_gen;
37707 case IX86_BUILTIN_GATHERSIV4SF:
37708 icode = CODE_FOR_avx2_gathersiv4sf;
37709 goto gather_gen;
37710 case IX86_BUILTIN_GATHERSIV8SF:
37711 icode = CODE_FOR_avx2_gathersiv8sf;
37712 goto gather_gen;
37713 case IX86_BUILTIN_GATHERDIV4SF:
37714 icode = CODE_FOR_avx2_gatherdiv4sf;
37715 goto gather_gen;
37716 case IX86_BUILTIN_GATHERDIV8SF:
37717 icode = CODE_FOR_avx2_gatherdiv8sf;
37718 goto gather_gen;
37719 case IX86_BUILTIN_GATHERSIV2DI:
37720 icode = CODE_FOR_avx2_gathersiv2di;
37721 goto gather_gen;
37722 case IX86_BUILTIN_GATHERSIV4DI:
37723 icode = CODE_FOR_avx2_gathersiv4di;
37724 goto gather_gen;
37725 case IX86_BUILTIN_GATHERDIV2DI:
37726 icode = CODE_FOR_avx2_gatherdiv2di;
37727 goto gather_gen;
37728 case IX86_BUILTIN_GATHERDIV4DI:
37729 icode = CODE_FOR_avx2_gatherdiv4di;
37730 goto gather_gen;
37731 case IX86_BUILTIN_GATHERSIV4SI:
37732 icode = CODE_FOR_avx2_gathersiv4si;
37733 goto gather_gen;
37734 case IX86_BUILTIN_GATHERSIV8SI:
37735 icode = CODE_FOR_avx2_gathersiv8si;
37736 goto gather_gen;
37737 case IX86_BUILTIN_GATHERDIV4SI:
37738 icode = CODE_FOR_avx2_gatherdiv4si;
37739 goto gather_gen;
37740 case IX86_BUILTIN_GATHERDIV8SI:
37741 icode = CODE_FOR_avx2_gatherdiv8si;
37742 goto gather_gen;
37743 case IX86_BUILTIN_GATHERALTSIV4DF:
37744 icode = CODE_FOR_avx2_gathersiv4df;
37745 goto gather_gen;
37746 case IX86_BUILTIN_GATHERALTDIV8SF:
37747 icode = CODE_FOR_avx2_gatherdiv8sf;
37748 goto gather_gen;
37749 case IX86_BUILTIN_GATHERALTSIV4DI:
37750 icode = CODE_FOR_avx2_gathersiv4di;
37751 goto gather_gen;
37752 case IX86_BUILTIN_GATHERALTDIV8SI:
37753 icode = CODE_FOR_avx2_gatherdiv8si;
37754 goto gather_gen;
37755 case IX86_BUILTIN_GATHER3SIV16SF:
37756 icode = CODE_FOR_avx512f_gathersiv16sf;
37757 goto gather_gen;
37758 case IX86_BUILTIN_GATHER3SIV8DF:
37759 icode = CODE_FOR_avx512f_gathersiv8df;
37760 goto gather_gen;
37761 case IX86_BUILTIN_GATHER3DIV16SF:
37762 icode = CODE_FOR_avx512f_gatherdiv16sf;
37763 goto gather_gen;
37764 case IX86_BUILTIN_GATHER3DIV8DF:
37765 icode = CODE_FOR_avx512f_gatherdiv8df;
37766 goto gather_gen;
37767 case IX86_BUILTIN_GATHER3SIV16SI:
37768 icode = CODE_FOR_avx512f_gathersiv16si;
37769 goto gather_gen;
37770 case IX86_BUILTIN_GATHER3SIV8DI:
37771 icode = CODE_FOR_avx512f_gathersiv8di;
37772 goto gather_gen;
37773 case IX86_BUILTIN_GATHER3DIV16SI:
37774 icode = CODE_FOR_avx512f_gatherdiv16si;
37775 goto gather_gen;
37776 case IX86_BUILTIN_GATHER3DIV8DI:
37777 icode = CODE_FOR_avx512f_gatherdiv8di;
37778 goto gather_gen;
37779 case IX86_BUILTIN_GATHER3ALTSIV8DF:
37780 icode = CODE_FOR_avx512f_gathersiv8df;
37781 goto gather_gen;
37782 case IX86_BUILTIN_GATHER3ALTDIV16SF:
37783 icode = CODE_FOR_avx512f_gatherdiv16sf;
37784 goto gather_gen;
37785 case IX86_BUILTIN_GATHER3ALTSIV8DI:
37786 icode = CODE_FOR_avx512f_gathersiv8di;
37787 goto gather_gen;
37788 case IX86_BUILTIN_GATHER3ALTDIV16SI:
37789 icode = CODE_FOR_avx512f_gatherdiv16si;
37790 goto gather_gen;
37791 case IX86_BUILTIN_GATHER3SIV2DF:
37792 icode = CODE_FOR_avx512vl_gathersiv2df;
37793 goto gather_gen;
37794 case IX86_BUILTIN_GATHER3SIV4DF:
37795 icode = CODE_FOR_avx512vl_gathersiv4df;
37796 goto gather_gen;
37797 case IX86_BUILTIN_GATHER3DIV2DF:
37798 icode = CODE_FOR_avx512vl_gatherdiv2df;
37799 goto gather_gen;
37800 case IX86_BUILTIN_GATHER3DIV4DF:
37801 icode = CODE_FOR_avx512vl_gatherdiv4df;
37802 goto gather_gen;
37803 case IX86_BUILTIN_GATHER3SIV4SF:
37804 icode = CODE_FOR_avx512vl_gathersiv4sf;
37805 goto gather_gen;
37806 case IX86_BUILTIN_GATHER3SIV8SF:
37807 icode = CODE_FOR_avx512vl_gathersiv8sf;
37808 goto gather_gen;
37809 case IX86_BUILTIN_GATHER3DIV4SF:
37810 icode = CODE_FOR_avx512vl_gatherdiv4sf;
37811 goto gather_gen;
37812 case IX86_BUILTIN_GATHER3DIV8SF:
37813 icode = CODE_FOR_avx512vl_gatherdiv8sf;
37814 goto gather_gen;
37815 case IX86_BUILTIN_GATHER3SIV2DI:
37816 icode = CODE_FOR_avx512vl_gathersiv2di;
37817 goto gather_gen;
37818 case IX86_BUILTIN_GATHER3SIV4DI:
37819 icode = CODE_FOR_avx512vl_gathersiv4di;
37820 goto gather_gen;
37821 case IX86_BUILTIN_GATHER3DIV2DI:
37822 icode = CODE_FOR_avx512vl_gatherdiv2di;
37823 goto gather_gen;
37824 case IX86_BUILTIN_GATHER3DIV4DI:
37825 icode = CODE_FOR_avx512vl_gatherdiv4di;
37826 goto gather_gen;
37827 case IX86_BUILTIN_GATHER3SIV4SI:
37828 icode = CODE_FOR_avx512vl_gathersiv4si;
37829 goto gather_gen;
37830 case IX86_BUILTIN_GATHER3SIV8SI:
37831 icode = CODE_FOR_avx512vl_gathersiv8si;
37832 goto gather_gen;
37833 case IX86_BUILTIN_GATHER3DIV4SI:
37834 icode = CODE_FOR_avx512vl_gatherdiv4si;
37835 goto gather_gen;
37836 case IX86_BUILTIN_GATHER3DIV8SI:
37837 icode = CODE_FOR_avx512vl_gatherdiv8si;
37838 goto gather_gen;
37839 case IX86_BUILTIN_GATHER3ALTSIV4DF:
37840 icode = CODE_FOR_avx512vl_gathersiv4df;
37841 goto gather_gen;
37842 case IX86_BUILTIN_GATHER3ALTDIV8SF:
37843 icode = CODE_FOR_avx512vl_gatherdiv8sf;
37844 goto gather_gen;
37845 case IX86_BUILTIN_GATHER3ALTSIV4DI:
37846 icode = CODE_FOR_avx512vl_gathersiv4di;
37847 goto gather_gen;
37848 case IX86_BUILTIN_GATHER3ALTDIV8SI:
37849 icode = CODE_FOR_avx512vl_gatherdiv8si;
37850 goto gather_gen;
37851 case IX86_BUILTIN_SCATTERSIV16SF:
37852 icode = CODE_FOR_avx512f_scattersiv16sf;
37853 goto scatter_gen;
37854 case IX86_BUILTIN_SCATTERSIV8DF:
37855 icode = CODE_FOR_avx512f_scattersiv8df;
37856 goto scatter_gen;
37857 case IX86_BUILTIN_SCATTERDIV16SF:
37858 icode = CODE_FOR_avx512f_scatterdiv16sf;
37859 goto scatter_gen;
37860 case IX86_BUILTIN_SCATTERDIV8DF:
37861 icode = CODE_FOR_avx512f_scatterdiv8df;
37862 goto scatter_gen;
37863 case IX86_BUILTIN_SCATTERSIV16SI:
37864 icode = CODE_FOR_avx512f_scattersiv16si;
37865 goto scatter_gen;
37866 case IX86_BUILTIN_SCATTERSIV8DI:
37867 icode = CODE_FOR_avx512f_scattersiv8di;
37868 goto scatter_gen;
37869 case IX86_BUILTIN_SCATTERDIV16SI:
37870 icode = CODE_FOR_avx512f_scatterdiv16si;
37871 goto scatter_gen;
37872 case IX86_BUILTIN_SCATTERDIV8DI:
37873 icode = CODE_FOR_avx512f_scatterdiv8di;
37874 goto scatter_gen;
37875 case IX86_BUILTIN_SCATTERSIV8SF:
37876 icode = CODE_FOR_avx512vl_scattersiv8sf;
37877 goto scatter_gen;
37878 case IX86_BUILTIN_SCATTERSIV4SF:
37879 icode = CODE_FOR_avx512vl_scattersiv4sf;
37880 goto scatter_gen;
37881 case IX86_BUILTIN_SCATTERSIV4DF:
37882 icode = CODE_FOR_avx512vl_scattersiv4df;
37883 goto scatter_gen;
37884 case IX86_BUILTIN_SCATTERSIV2DF:
37885 icode = CODE_FOR_avx512vl_scattersiv2df;
37886 goto scatter_gen;
37887 case IX86_BUILTIN_SCATTERDIV8SF:
37888 icode = CODE_FOR_avx512vl_scatterdiv8sf;
37889 goto scatter_gen;
37890 case IX86_BUILTIN_SCATTERDIV4SF:
37891 icode = CODE_FOR_avx512vl_scatterdiv4sf;
37892 goto scatter_gen;
37893 case IX86_BUILTIN_SCATTERDIV4DF:
37894 icode = CODE_FOR_avx512vl_scatterdiv4df;
37895 goto scatter_gen;
37896 case IX86_BUILTIN_SCATTERDIV2DF:
37897 icode = CODE_FOR_avx512vl_scatterdiv2df;
37898 goto scatter_gen;
37899 case IX86_BUILTIN_SCATTERSIV8SI:
37900 icode = CODE_FOR_avx512vl_scattersiv8si;
37901 goto scatter_gen;
37902 case IX86_BUILTIN_SCATTERSIV4SI:
37903 icode = CODE_FOR_avx512vl_scattersiv4si;
37904 goto scatter_gen;
37905 case IX86_BUILTIN_SCATTERSIV4DI:
37906 icode = CODE_FOR_avx512vl_scattersiv4di;
37907 goto scatter_gen;
37908 case IX86_BUILTIN_SCATTERSIV2DI:
37909 icode = CODE_FOR_avx512vl_scattersiv2di;
37910 goto scatter_gen;
37911 case IX86_BUILTIN_SCATTERDIV8SI:
37912 icode = CODE_FOR_avx512vl_scatterdiv8si;
37913 goto scatter_gen;
37914 case IX86_BUILTIN_SCATTERDIV4SI:
37915 icode = CODE_FOR_avx512vl_scatterdiv4si;
37916 goto scatter_gen;
37917 case IX86_BUILTIN_SCATTERDIV4DI:
37918 icode = CODE_FOR_avx512vl_scatterdiv4di;
37919 goto scatter_gen;
37920 case IX86_BUILTIN_SCATTERDIV2DI:
37921 icode = CODE_FOR_avx512vl_scatterdiv2di;
37922 goto scatter_gen;
37923 case IX86_BUILTIN_GATHERPFDPD:
37924 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
37925 goto vec_prefetch_gen;
37926 case IX86_BUILTIN_SCATTERALTSIV8DF:
37927 icode = CODE_FOR_avx512f_scattersiv8df;
37928 goto scatter_gen;
37929 case IX86_BUILTIN_SCATTERALTDIV16SF:
37930 icode = CODE_FOR_avx512f_scatterdiv16sf;
37931 goto scatter_gen;
37932 case IX86_BUILTIN_SCATTERALTSIV8DI:
37933 icode = CODE_FOR_avx512f_scattersiv8di;
37934 goto scatter_gen;
37935 case IX86_BUILTIN_SCATTERALTDIV16SI:
37936 icode = CODE_FOR_avx512f_scatterdiv16si;
37937 goto scatter_gen;
37938 case IX86_BUILTIN_GATHERPFDPS:
37939 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
37940 goto vec_prefetch_gen;
37941 case IX86_BUILTIN_GATHERPFQPD:
37942 icode = CODE_FOR_avx512pf_gatherpfv8didf;
37943 goto vec_prefetch_gen;
37944 case IX86_BUILTIN_GATHERPFQPS:
37945 icode = CODE_FOR_avx512pf_gatherpfv8disf;
37946 goto vec_prefetch_gen;
37947 case IX86_BUILTIN_SCATTERPFDPD:
37948 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
37949 goto vec_prefetch_gen;
37950 case IX86_BUILTIN_SCATTERPFDPS:
37951 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
37952 goto vec_prefetch_gen;
37953 case IX86_BUILTIN_SCATTERPFQPD:
37954 icode = CODE_FOR_avx512pf_scatterpfv8didf;
37955 goto vec_prefetch_gen;
37956 case IX86_BUILTIN_SCATTERPFQPS:
37957 icode = CODE_FOR_avx512pf_scatterpfv8disf;
37958 goto vec_prefetch_gen;
37960 gather_gen:
37961 rtx half;
37962 rtx (*gen) (rtx, rtx);
37964 arg0 = CALL_EXPR_ARG (exp, 0);
37965 arg1 = CALL_EXPR_ARG (exp, 1);
37966 arg2 = CALL_EXPR_ARG (exp, 2);
37967 arg3 = CALL_EXPR_ARG (exp, 3);
37968 arg4 = CALL_EXPR_ARG (exp, 4);
37969 op0 = expand_normal (arg0);
37970 op1 = expand_normal (arg1);
37971 op2 = expand_normal (arg2);
37972 op3 = expand_normal (arg3);
37973 op4 = expand_normal (arg4);
37974 /* Note the arg order is different from the operand order. */
37975 mode0 = insn_data[icode].operand[1].mode;
37976 mode2 = insn_data[icode].operand[3].mode;
37977 mode3 = insn_data[icode].operand[4].mode;
37978 mode4 = insn_data[icode].operand[5].mode;
37980 if (target == NULL_RTX
37981 || GET_MODE (target) != insn_data[icode].operand[0].mode
37982 || !insn_data[icode].operand[0].predicate (target,
37983 GET_MODE (target)))
37984 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
37985 else
37986 subtarget = target;
37988 switch (fcode)
37990 case IX86_BUILTIN_GATHER3ALTSIV8DF:
37991 case IX86_BUILTIN_GATHER3ALTSIV8DI:
37992 half = gen_reg_rtx (V8SImode);
37993 if (!nonimmediate_operand (op2, V16SImode))
37994 op2 = copy_to_mode_reg (V16SImode, op2);
37995 emit_insn (gen_vec_extract_lo_v16si (half, op2));
37996 op2 = half;
37997 break;
37998 case IX86_BUILTIN_GATHER3ALTSIV4DF:
37999 case IX86_BUILTIN_GATHER3ALTSIV4DI:
38000 case IX86_BUILTIN_GATHERALTSIV4DF:
38001 case IX86_BUILTIN_GATHERALTSIV4DI:
38002 half = gen_reg_rtx (V4SImode);
38003 if (!nonimmediate_operand (op2, V8SImode))
38004 op2 = copy_to_mode_reg (V8SImode, op2);
38005 emit_insn (gen_vec_extract_lo_v8si (half, op2));
38006 op2 = half;
38007 break;
38008 case IX86_BUILTIN_GATHER3ALTDIV16SF:
38009 case IX86_BUILTIN_GATHER3ALTDIV16SI:
38010 half = gen_reg_rtx (mode0);
38011 if (mode0 == V8SFmode)
38012 gen = gen_vec_extract_lo_v16sf;
38013 else
38014 gen = gen_vec_extract_lo_v16si;
38015 if (!nonimmediate_operand (op0, GET_MODE (op0)))
38016 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
38017 emit_insn (gen (half, op0));
38018 op0 = half;
38019 if (GET_MODE (op3) != VOIDmode)
38021 if (!nonimmediate_operand (op3, GET_MODE (op3)))
38022 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38023 emit_insn (gen (half, op3));
38024 op3 = half;
38026 break;
38027 case IX86_BUILTIN_GATHER3ALTDIV8SF:
38028 case IX86_BUILTIN_GATHER3ALTDIV8SI:
38029 case IX86_BUILTIN_GATHERALTDIV8SF:
38030 case IX86_BUILTIN_GATHERALTDIV8SI:
38031 half = gen_reg_rtx (mode0);
38032 if (mode0 == V4SFmode)
38033 gen = gen_vec_extract_lo_v8sf;
38034 else
38035 gen = gen_vec_extract_lo_v8si;
38036 if (!nonimmediate_operand (op0, GET_MODE (op0)))
38037 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
38038 emit_insn (gen (half, op0));
38039 op0 = half;
38040 if (GET_MODE (op3) != VOIDmode)
38042 if (!nonimmediate_operand (op3, GET_MODE (op3)))
38043 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38044 emit_insn (gen (half, op3));
38045 op3 = half;
38047 break;
38048 default:
38049 break;
38052 /* Force memory operand only with base register here. But we
38053 don't want to do it on memory operand for other builtin
38054 functions. */
38055 op1 = ix86_zero_extend_to_Pmode (op1);
38057 if (!insn_data[icode].operand[1].predicate (op0, mode0))
38058 op0 = copy_to_mode_reg (mode0, op0);
38059 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
38060 op1 = copy_to_mode_reg (Pmode, op1);
38061 if (!insn_data[icode].operand[3].predicate (op2, mode2))
38062 op2 = copy_to_mode_reg (mode2, op2);
38064 op3 = fixup_modeless_constant (op3, mode3);
38066 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
38068 if (!insn_data[icode].operand[4].predicate (op3, mode3))
38069 op3 = copy_to_mode_reg (mode3, op3);
38071 else
38073 op3 = copy_to_reg (op3);
38074 op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
38076 if (!insn_data[icode].operand[5].predicate (op4, mode4))
38078 error ("the last argument must be scale 1, 2, 4, 8");
38079 return const0_rtx;
38082 /* Optimize. If mask is known to have all high bits set,
38083 replace op0 with pc_rtx to signal that the instruction
38084 overwrites the whole destination and doesn't use its
38085 previous contents. */
38086 if (optimize)
38088 if (TREE_CODE (arg3) == INTEGER_CST)
38090 if (integer_all_onesp (arg3))
38091 op0 = pc_rtx;
38093 else if (TREE_CODE (arg3) == VECTOR_CST)
38095 unsigned int negative = 0;
38096 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
38098 tree cst = VECTOR_CST_ELT (arg3, i);
38099 if (TREE_CODE (cst) == INTEGER_CST
38100 && tree_int_cst_sign_bit (cst))
38101 negative++;
38102 else if (TREE_CODE (cst) == REAL_CST
38103 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
38104 negative++;
38106 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
38107 op0 = pc_rtx;
38109 else if (TREE_CODE (arg3) == SSA_NAME
38110 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
38112 /* Recognize also when mask is like:
38113 __v2df src = _mm_setzero_pd ();
38114 __v2df mask = _mm_cmpeq_pd (src, src);
38116 __v8sf src = _mm256_setzero_ps ();
38117 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
38118 as that is a cheaper way to load all ones into
38119 a register than having to load a constant from
38120 memory. */
38121 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
38122 if (is_gimple_call (def_stmt))
38124 tree fndecl = gimple_call_fndecl (def_stmt);
38125 if (fndecl
38126 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
38127 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
38129 case IX86_BUILTIN_CMPPD:
38130 case IX86_BUILTIN_CMPPS:
38131 case IX86_BUILTIN_CMPPD256:
38132 case IX86_BUILTIN_CMPPS256:
38133 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
38134 break;
38135 /* FALLTHRU */
38136 case IX86_BUILTIN_CMPEQPD:
38137 case IX86_BUILTIN_CMPEQPS:
38138 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
38139 && initializer_zerop (gimple_call_arg (def_stmt,
38140 1)))
38141 op0 = pc_rtx;
38142 break;
38143 default:
38144 break;
38150 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
38151 if (! pat)
38152 return const0_rtx;
38153 emit_insn (pat);
38155 switch (fcode)
38157 case IX86_BUILTIN_GATHER3DIV16SF:
38158 if (target == NULL_RTX)
38159 target = gen_reg_rtx (V8SFmode);
38160 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
38161 break;
38162 case IX86_BUILTIN_GATHER3DIV16SI:
38163 if (target == NULL_RTX)
38164 target = gen_reg_rtx (V8SImode);
38165 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
38166 break;
38167 case IX86_BUILTIN_GATHER3DIV8SF:
38168 case IX86_BUILTIN_GATHERDIV8SF:
38169 if (target == NULL_RTX)
38170 target = gen_reg_rtx (V4SFmode);
38171 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
38172 break;
38173 case IX86_BUILTIN_GATHER3DIV8SI:
38174 case IX86_BUILTIN_GATHERDIV8SI:
38175 if (target == NULL_RTX)
38176 target = gen_reg_rtx (V4SImode);
38177 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
38178 break;
38179 default:
38180 target = subtarget;
38181 break;
38183 return target;
38185 scatter_gen:
38186 arg0 = CALL_EXPR_ARG (exp, 0);
38187 arg1 = CALL_EXPR_ARG (exp, 1);
38188 arg2 = CALL_EXPR_ARG (exp, 2);
38189 arg3 = CALL_EXPR_ARG (exp, 3);
38190 arg4 = CALL_EXPR_ARG (exp, 4);
38191 op0 = expand_normal (arg0);
38192 op1 = expand_normal (arg1);
38193 op2 = expand_normal (arg2);
38194 op3 = expand_normal (arg3);
38195 op4 = expand_normal (arg4);
38196 mode1 = insn_data[icode].operand[1].mode;
38197 mode2 = insn_data[icode].operand[2].mode;
38198 mode3 = insn_data[icode].operand[3].mode;
38199 mode4 = insn_data[icode].operand[4].mode;
38201 /* Scatter instruction stores operand op3 to memory with
38202 indices from op2 and scale from op4 under writemask op1.
38203 If index operand op2 has more elements then source operand
38204 op3 one need to use only its low half. And vice versa. */
38205 switch (fcode)
38207 case IX86_BUILTIN_SCATTERALTSIV8DF:
38208 case IX86_BUILTIN_SCATTERALTSIV8DI:
38209 half = gen_reg_rtx (V8SImode);
38210 if (!nonimmediate_operand (op2, V16SImode))
38211 op2 = copy_to_mode_reg (V16SImode, op2);
38212 emit_insn (gen_vec_extract_lo_v16si (half, op2));
38213 op2 = half;
38214 break;
38215 case IX86_BUILTIN_SCATTERALTDIV16SF:
38216 case IX86_BUILTIN_SCATTERALTDIV16SI:
38217 half = gen_reg_rtx (mode3);
38218 if (mode3 == V8SFmode)
38219 gen = gen_vec_extract_lo_v16sf;
38220 else
38221 gen = gen_vec_extract_lo_v16si;
38222 if (!nonimmediate_operand (op3, GET_MODE (op3)))
38223 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38224 emit_insn (gen (half, op3));
38225 op3 = half;
38226 break;
38227 default:
38228 break;
38231 /* Force memory operand only with base register here. But we
38232 don't want to do it on memory operand for other builtin
38233 functions. */
38234 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
38236 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38237 op0 = copy_to_mode_reg (Pmode, op0);
38239 op1 = fixup_modeless_constant (op1, mode1);
38241 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
38243 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38244 op1 = copy_to_mode_reg (mode1, op1);
38246 else
38248 op1 = copy_to_reg (op1);
38249 op1 = lowpart_subreg (mode1, op1, GET_MODE (op1));
38252 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38253 op2 = copy_to_mode_reg (mode2, op2);
38255 if (!insn_data[icode].operand[3].predicate (op3, mode3))
38256 op3 = copy_to_mode_reg (mode3, op3);
38258 if (!insn_data[icode].operand[4].predicate (op4, mode4))
38260 error ("the last argument must be scale 1, 2, 4, 8");
38261 return const0_rtx;
38264 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
38265 if (! pat)
38266 return const0_rtx;
38268 emit_insn (pat);
38269 return 0;
38271 vec_prefetch_gen:
38272 arg0 = CALL_EXPR_ARG (exp, 0);
38273 arg1 = CALL_EXPR_ARG (exp, 1);
38274 arg2 = CALL_EXPR_ARG (exp, 2);
38275 arg3 = CALL_EXPR_ARG (exp, 3);
38276 arg4 = CALL_EXPR_ARG (exp, 4);
38277 op0 = expand_normal (arg0);
38278 op1 = expand_normal (arg1);
38279 op2 = expand_normal (arg2);
38280 op3 = expand_normal (arg3);
38281 op4 = expand_normal (arg4);
38282 mode0 = insn_data[icode].operand[0].mode;
38283 mode1 = insn_data[icode].operand[1].mode;
38284 mode3 = insn_data[icode].operand[3].mode;
38285 mode4 = insn_data[icode].operand[4].mode;
38287 op0 = fixup_modeless_constant (op0, mode0);
38289 if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
38291 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38292 op0 = copy_to_mode_reg (mode0, op0);
38294 else
38296 op0 = copy_to_reg (op0);
38297 op0 = lowpart_subreg (mode0, op0, GET_MODE (op0));
38300 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38301 op1 = copy_to_mode_reg (mode1, op1);
38303 /* Force memory operand only with base register here. But we
38304 don't want to do it on memory operand for other builtin
38305 functions. */
38306 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
38308 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
38309 op2 = copy_to_mode_reg (Pmode, op2);
38311 if (!insn_data[icode].operand[3].predicate (op3, mode3))
38313 error ("the forth argument must be scale 1, 2, 4, 8");
38314 return const0_rtx;
38317 if (!insn_data[icode].operand[4].predicate (op4, mode4))
38319 error ("incorrect hint operand");
38320 return const0_rtx;
38323 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
38324 if (! pat)
38325 return const0_rtx;
38327 emit_insn (pat);
38329 return 0;
38331 case IX86_BUILTIN_XABORT:
38332 icode = CODE_FOR_xabort;
38333 arg0 = CALL_EXPR_ARG (exp, 0);
38334 op0 = expand_normal (arg0);
38335 mode0 = insn_data[icode].operand[0].mode;
38336 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38338 error ("the xabort's argument must be an 8-bit immediate");
38339 return const0_rtx;
38341 emit_insn (gen_xabort (op0));
38342 return 0;
38344 case IX86_BUILTIN_RSTORSSP:
38345 case IX86_BUILTIN_CLRSSBSY:
38346 arg0 = CALL_EXPR_ARG (exp, 0);
38347 op0 = expand_normal (arg0);
38348 icode = (fcode == IX86_BUILTIN_RSTORSSP
38349 ? CODE_FOR_rstorssp
38350 : CODE_FOR_clrssbsy);
38351 if (!address_operand (op0, VOIDmode))
38353 op1 = convert_memory_address (Pmode, op0);
38354 op0 = copy_addr_to_reg (op1);
38356 emit_insn (GEN_FCN (icode) (gen_rtx_MEM (Pmode, op0)));
38357 return 0;
38359 case IX86_BUILTIN_WRSSD:
38360 case IX86_BUILTIN_WRSSQ:
38361 case IX86_BUILTIN_WRUSSD:
38362 case IX86_BUILTIN_WRUSSQ:
38363 arg0 = CALL_EXPR_ARG (exp, 0);
38364 op0 = expand_normal (arg0);
38365 arg1 = CALL_EXPR_ARG (exp, 1);
38366 op1 = expand_normal (arg1);
38367 switch (fcode)
38369 case IX86_BUILTIN_WRSSD:
38370 icode = CODE_FOR_wrsssi;
38371 mode = SImode;
38372 break;
38373 case IX86_BUILTIN_WRSSQ:
38374 icode = CODE_FOR_wrssdi;
38375 mode = DImode;
38376 break;
38377 case IX86_BUILTIN_WRUSSD:
38378 icode = CODE_FOR_wrusssi;
38379 mode = SImode;
38380 break;
38381 case IX86_BUILTIN_WRUSSQ:
38382 icode = CODE_FOR_wrussdi;
38383 mode = DImode;
38384 break;
38386 op0 = force_reg (mode, op0);
38387 if (!address_operand (op1, VOIDmode))
38389 op2 = convert_memory_address (Pmode, op1);
38390 op1 = copy_addr_to_reg (op2);
38392 emit_insn (GEN_FCN (icode) (op0, gen_rtx_MEM (mode, op1)));
38393 return 0;
38395 default:
38396 break;
38399 if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
38400 && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST)
38402 i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST;
38403 return ix86_expand_special_args_builtin (bdesc_special_args + i, exp,
38404 target);
38407 if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST
38408 && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST)
38410 i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST;
38411 return ix86_expand_special_args_builtin (bdesc_special_args2 + i, exp,
38412 target);
38415 if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST
38416 && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST)
38418 i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST;
38419 switch (fcode)
38421 case IX86_BUILTIN_FABSQ:
38422 case IX86_BUILTIN_COPYSIGNQ:
38423 if (!TARGET_SSE)
38424 /* Emit a normal call if SSE isn't available. */
38425 return expand_call (exp, target, ignore);
38426 /* FALLTHRU */
38427 default:
38428 return ix86_expand_args_builtin (bdesc_args + i, exp, target);
38432 if (fcode >= IX86_BUILTIN__BDESC_ARGS2_FIRST
38433 && fcode <= IX86_BUILTIN__BDESC_ARGS2_LAST)
38435 i = fcode - IX86_BUILTIN__BDESC_ARGS2_FIRST;
38436 rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL;
38437 rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx);
38438 rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx);
38439 int masked = 1;
38440 machine_mode mode, wide_mode, nar_mode;
38442 nar_mode = V4SFmode;
38443 mode = V16SFmode;
38444 wide_mode = V64SFmode;
38445 fcn_mask = gen_avx5124fmaddps_4fmaddps_mask;
38446 fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz;
38448 switch (fcode)
38450 case IX86_BUILTIN_4FMAPS:
38451 fcn = gen_avx5124fmaddps_4fmaddps;
38452 masked = 0;
38453 goto v4fma_expand;
38455 case IX86_BUILTIN_4DPWSSD:
38456 nar_mode = V4SImode;
38457 mode = V16SImode;
38458 wide_mode = V64SImode;
38459 fcn = gen_avx5124vnniw_vp4dpwssd;
38460 masked = 0;
38461 goto v4fma_expand;
38463 case IX86_BUILTIN_4DPWSSDS:
38464 nar_mode = V4SImode;
38465 mode = V16SImode;
38466 wide_mode = V64SImode;
38467 fcn = gen_avx5124vnniw_vp4dpwssds;
38468 masked = 0;
38469 goto v4fma_expand;
38471 case IX86_BUILTIN_4FNMAPS:
38472 fcn = gen_avx5124fmaddps_4fnmaddps;
38473 masked = 0;
38474 goto v4fma_expand;
38476 case IX86_BUILTIN_4FNMAPS_MASK:
38477 fcn_mask = gen_avx5124fmaddps_4fnmaddps_mask;
38478 fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz;
38479 goto v4fma_expand;
38481 case IX86_BUILTIN_4DPWSSD_MASK:
38482 nar_mode = V4SImode;
38483 mode = V16SImode;
38484 wide_mode = V64SImode;
38485 fcn_mask = gen_avx5124vnniw_vp4dpwssd_mask;
38486 fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz;
38487 goto v4fma_expand;
38489 case IX86_BUILTIN_4DPWSSDS_MASK:
38490 nar_mode = V4SImode;
38491 mode = V16SImode;
38492 wide_mode = V64SImode;
38493 fcn_mask = gen_avx5124vnniw_vp4dpwssds_mask;
38494 fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz;
38495 goto v4fma_expand;
38497 case IX86_BUILTIN_4FMAPS_MASK:
38499 tree args[4];
38500 rtx ops[4];
38501 rtx wide_reg;
38502 rtx accum;
38503 rtx addr;
38504 rtx mem;
38506 v4fma_expand:
38507 wide_reg = gen_reg_rtx (wide_mode);
38508 for (i = 0; i < 4; i++)
38510 args[i] = CALL_EXPR_ARG (exp, i);
38511 ops[i] = expand_normal (args[i]);
38513 emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64),
38514 ops[i]);
38517 accum = expand_normal (CALL_EXPR_ARG (exp, 4));
38518 accum = force_reg (mode, accum);
38520 addr = expand_normal (CALL_EXPR_ARG (exp, 5));
38521 addr = force_reg (Pmode, addr);
38523 mem = gen_rtx_MEM (nar_mode, addr);
38525 target = gen_reg_rtx (mode);
38527 emit_move_insn (target, accum);
38529 if (! masked)
38530 emit_insn (fcn (target, accum, wide_reg, mem));
38531 else
38533 rtx merge, mask;
38534 merge = expand_normal (CALL_EXPR_ARG (exp, 6));
38536 mask = expand_normal (CALL_EXPR_ARG (exp, 7));
38538 if (CONST_INT_P (mask))
38539 mask = fixup_modeless_constant (mask, HImode);
38541 mask = force_reg (HImode, mask);
38543 if (GET_MODE (mask) != HImode)
38544 mask = gen_rtx_SUBREG (HImode, mask, 0);
38546 /* If merge is 0 then we're about to emit z-masked variant. */
38547 if (const0_operand (merge, mode))
38548 emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
38549 /* If merge is the same as accum then emit merge-masked variant. */
38550 else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
38552 merge = force_reg (mode, merge);
38553 emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
38555 /* Merge with something unknown might happen if we z-mask w/ -O0. */
38556 else
38558 target = gen_reg_rtx (mode);
38559 emit_move_insn (target, merge);
38560 emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
38563 return target;
38566 case IX86_BUILTIN_4FNMASS:
38567 fcn = gen_avx5124fmaddps_4fnmaddss;
38568 masked = 0;
38569 goto s4fma_expand;
38571 case IX86_BUILTIN_4FMASS:
38572 fcn = gen_avx5124fmaddps_4fmaddss;
38573 masked = 0;
38574 goto s4fma_expand;
38576 case IX86_BUILTIN_4FNMASS_MASK:
38577 fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask;
38578 fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz;
38579 goto s4fma_expand;
38581 case IX86_BUILTIN_4FMASS_MASK:
38583 tree args[4];
38584 rtx ops[4];
38585 rtx wide_reg;
38586 rtx accum;
38587 rtx addr;
38588 rtx mem;
38590 fcn_mask = gen_avx5124fmaddps_4fmaddss_mask;
38591 fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz;
38593 s4fma_expand:
38594 mode = V4SFmode;
38595 wide_reg = gen_reg_rtx (V64SFmode);
38596 for (i = 0; i < 4; i++)
38598 rtx tmp;
38599 args[i] = CALL_EXPR_ARG (exp, i);
38600 ops[i] = expand_normal (args[i]);
38602 tmp = gen_reg_rtx (SFmode);
38603 emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0));
38605 emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64),
38606 gen_rtx_SUBREG (V16SFmode, tmp, 0));
38609 accum = expand_normal (CALL_EXPR_ARG (exp, 4));
38610 accum = force_reg (V4SFmode, accum);
38612 addr = expand_normal (CALL_EXPR_ARG (exp, 5));
38613 addr = force_reg (Pmode, addr);
38615 mem = gen_rtx_MEM (V4SFmode, addr);
38617 target = gen_reg_rtx (V4SFmode);
38619 emit_move_insn (target, accum);
38621 if (! masked)
38622 emit_insn (fcn (target, accum, wide_reg, mem));
38623 else
38625 rtx merge, mask;
38626 merge = expand_normal (CALL_EXPR_ARG (exp, 6));
38628 mask = expand_normal (CALL_EXPR_ARG (exp, 7));
38630 if (CONST_INT_P (mask))
38631 mask = fixup_modeless_constant (mask, QImode);
38633 mask = force_reg (QImode, mask);
38635 if (GET_MODE (mask) != QImode)
38636 mask = gen_rtx_SUBREG (QImode, mask, 0);
38638 /* If merge is 0 then we're about to emit z-masked variant. */
38639 if (const0_operand (merge, mode))
38640 emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
38641 /* If merge is the same as accum then emit merge-masked
38642 variant. */
38643 else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
38645 merge = force_reg (mode, merge);
38646 emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
38648 /* Merge with something unknown might happen if we z-mask
38649 w/ -O0. */
38650 else
38652 target = gen_reg_rtx (mode);
38653 emit_move_insn (target, merge);
38654 emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
38657 return target;
38659 case IX86_BUILTIN_RDPID:
38660 return ix86_expand_special_args_builtin (bdesc_args2 + i, exp,
38661 target);
38662 default:
38663 return ix86_expand_args_builtin (bdesc_args2 + i, exp, target);
38667 if (fcode >= IX86_BUILTIN__BDESC_COMI_FIRST
38668 && fcode <= IX86_BUILTIN__BDESC_COMI_LAST)
38670 i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST;
38671 return ix86_expand_sse_comi (bdesc_comi + i, exp, target);
38674 if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
38675 && fcode <= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST)
38677 i = fcode - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST;
38678 return ix86_expand_round_builtin (bdesc_round_args + i, exp, target);
38681 if (fcode >= IX86_BUILTIN__BDESC_PCMPESTR_FIRST
38682 && fcode <= IX86_BUILTIN__BDESC_PCMPESTR_LAST)
38684 i = fcode - IX86_BUILTIN__BDESC_PCMPESTR_FIRST;
38685 return ix86_expand_sse_pcmpestr (bdesc_pcmpestr + i, exp, target);
38688 if (fcode >= IX86_BUILTIN__BDESC_PCMPISTR_FIRST
38689 && fcode <= IX86_BUILTIN__BDESC_PCMPISTR_LAST)
38691 i = fcode - IX86_BUILTIN__BDESC_PCMPISTR_FIRST;
38692 return ix86_expand_sse_pcmpistr (bdesc_pcmpistr + i, exp, target);
38695 if (fcode >= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
38696 && fcode <= IX86_BUILTIN__BDESC_MULTI_ARG_LAST)
38698 i = fcode - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST;
38699 const struct builtin_description *d = bdesc_multi_arg + i;
38700 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
38701 (enum ix86_builtin_func_type)
38702 d->flag, d->comparison);
38705 if (fcode >= IX86_BUILTIN__BDESC_CET_FIRST
38706 && fcode <= IX86_BUILTIN__BDESC_CET_LAST)
38708 i = fcode - IX86_BUILTIN__BDESC_CET_FIRST;
38709 return ix86_expand_special_args_builtin (bdesc_cet + i, exp,
38710 target);
38713 if (fcode >= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST
38714 && fcode <= IX86_BUILTIN__BDESC_CET_NORMAL_LAST)
38716 i = fcode - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST;
38717 return ix86_expand_special_args_builtin (bdesc_cet_rdssp + i, exp,
38718 target);
38721 gcc_unreachable ();
38724 /* This returns the target-specific builtin with code CODE if
38725 current_function_decl has visibility on this builtin, which is checked
38726 using isa flags. Returns NULL_TREE otherwise. */
38728 static tree ix86_get_builtin (enum ix86_builtins code)
38730 struct cl_target_option *opts;
38731 tree target_tree = NULL_TREE;
38733 /* Determine the isa flags of current_function_decl. */
38735 if (current_function_decl)
38736 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
38738 if (target_tree == NULL)
38739 target_tree = target_option_default_node;
38741 opts = TREE_TARGET_OPTION (target_tree);
38743 if ((ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
38744 || (ix86_builtins_isa[(int) code].isa2 & opts->x_ix86_isa_flags2))
38745 return ix86_builtin_decl (code, true);
38746 else
38747 return NULL_TREE;
38750 /* Return function decl for target specific builtin
38751 for given MPX builtin passed i FCODE. */
38752 static tree
38753 ix86_builtin_mpx_function (unsigned fcode)
38755 switch (fcode)
38757 case BUILT_IN_CHKP_BNDMK:
38758 return ix86_builtins[IX86_BUILTIN_BNDMK];
38760 case BUILT_IN_CHKP_BNDSTX:
38761 return ix86_builtins[IX86_BUILTIN_BNDSTX];
38763 case BUILT_IN_CHKP_BNDLDX:
38764 return ix86_builtins[IX86_BUILTIN_BNDLDX];
38766 case BUILT_IN_CHKP_BNDCL:
38767 return ix86_builtins[IX86_BUILTIN_BNDCL];
38769 case BUILT_IN_CHKP_BNDCU:
38770 return ix86_builtins[IX86_BUILTIN_BNDCU];
38772 case BUILT_IN_CHKP_BNDRET:
38773 return ix86_builtins[IX86_BUILTIN_BNDRET];
38775 case BUILT_IN_CHKP_INTERSECT:
38776 return ix86_builtins[IX86_BUILTIN_BNDINT];
38778 case BUILT_IN_CHKP_NARROW:
38779 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
38781 case BUILT_IN_CHKP_SIZEOF:
38782 return ix86_builtins[IX86_BUILTIN_SIZEOF];
38784 case BUILT_IN_CHKP_EXTRACT_LOWER:
38785 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
38787 case BUILT_IN_CHKP_EXTRACT_UPPER:
38788 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
38790 default:
38791 return NULL_TREE;
38794 gcc_unreachable ();
38797 /* Helper function for ix86_load_bounds and ix86_store_bounds.
38799 Return an address to be used to load/store bounds for pointer
38800 passed in SLOT.
38802 SLOT_NO is an integer constant holding number of a target
38803 dependent special slot to be used in case SLOT is not a memory.
38805 SPECIAL_BASE is a pointer to be used as a base of fake address
38806 to access special slots in Bounds Table. SPECIAL_BASE[-1],
38807 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
38809 static rtx
38810 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
38812 rtx addr = NULL;
38814 /* NULL slot means we pass bounds for pointer not passed to the
38815 function at all. Register slot means we pass pointer in a
38816 register. In both these cases bounds are passed via Bounds
38817 Table. Since we do not have actual pointer stored in memory,
38818 we have to use fake addresses to access Bounds Table. We
38819 start with (special_base - sizeof (void*)) and decrease this
38820 address by pointer size to get addresses for other slots. */
38821 if (!slot || REG_P (slot))
38823 gcc_assert (CONST_INT_P (slot_no));
38824 addr = plus_constant (Pmode, special_base,
38825 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
38827 /* If pointer is passed in a memory then its address is used to
38828 access Bounds Table. */
38829 else if (MEM_P (slot))
38831 addr = XEXP (slot, 0);
38832 if (!register_operand (addr, Pmode))
38833 addr = copy_addr_to_reg (addr);
38835 else
38836 gcc_unreachable ();
38838 return addr;
38841 /* Expand pass uses this hook to load bounds for function parameter
38842 PTR passed in SLOT in case its bounds are not passed in a register.
38844 If SLOT is a memory, then bounds are loaded as for regular pointer
38845 loaded from memory. PTR may be NULL in case SLOT is a memory.
38846 In such case value of PTR (if required) may be loaded from SLOT.
38848 If SLOT is NULL or a register then SLOT_NO is an integer constant
38849 holding number of the target dependent special slot which should be
38850 used to obtain bounds.
38852 Return loaded bounds. */
38854 static rtx
38855 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
38857 rtx reg = gen_reg_rtx (BNDmode);
38858 rtx addr;
38860 /* Get address to be used to access Bounds Table. Special slots start
38861 at the location of return address of the current function. */
38862 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
38864 /* Load pointer value from a memory if we don't have it. */
38865 if (!ptr)
38867 gcc_assert (MEM_P (slot));
38868 ptr = copy_addr_to_reg (slot);
38871 if (!register_operand (ptr, Pmode))
38872 ptr = ix86_zero_extend_to_Pmode (ptr);
38874 emit_insn (BNDmode == BND64mode
38875 ? gen_bnd64_ldx (reg, addr, ptr)
38876 : gen_bnd32_ldx (reg, addr, ptr));
38878 return reg;
38881 /* Expand pass uses this hook to store BOUNDS for call argument PTR
38882 passed in SLOT in case BOUNDS are not passed in a register.
38884 If SLOT is a memory, then BOUNDS are stored as for regular pointer
38885 stored in memory. PTR may be NULL in case SLOT is a memory.
38886 In such case value of PTR (if required) may be loaded from SLOT.
38888 If SLOT is NULL or a register then SLOT_NO is an integer constant
38889 holding number of the target dependent special slot which should be
38890 used to store BOUNDS. */
38892 static void
38893 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
38895 rtx addr;
38897 /* Get address to be used to access Bounds Table. Special slots start
38898 at the location of return address of a called function. */
38899 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
38901 /* Load pointer value from a memory if we don't have it. */
38902 if (!ptr)
38904 gcc_assert (MEM_P (slot));
38905 ptr = copy_addr_to_reg (slot);
38908 if (!register_operand (ptr, Pmode))
38909 ptr = ix86_zero_extend_to_Pmode (ptr);
38911 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
38912 if (!register_operand (bounds, BNDmode))
38913 bounds = copy_to_mode_reg (BNDmode, bounds);
38915 emit_insn (BNDmode == BND64mode
38916 ? gen_bnd64_stx (addr, ptr, bounds)
38917 : gen_bnd32_stx (addr, ptr, bounds));
38920 /* Load and return bounds returned by function in SLOT. */
38922 static rtx
38923 ix86_load_returned_bounds (rtx slot)
38925 rtx res;
38927 gcc_assert (REG_P (slot));
38928 res = gen_reg_rtx (BNDmode);
38929 emit_move_insn (res, slot);
38931 return res;
38934 /* Store BOUNDS returned by function into SLOT. */
38936 static void
38937 ix86_store_returned_bounds (rtx slot, rtx bounds)
38939 gcc_assert (REG_P (slot));
38940 emit_move_insn (slot, bounds);
38943 /* Returns a function decl for a vectorized version of the combined function
38944 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
38945 if it is not available. */
38947 static tree
38948 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
38949 tree type_in)
38951 machine_mode in_mode, out_mode;
38952 int in_n, out_n;
38954 if (TREE_CODE (type_out) != VECTOR_TYPE
38955 || TREE_CODE (type_in) != VECTOR_TYPE)
38956 return NULL_TREE;
38958 out_mode = TYPE_MODE (TREE_TYPE (type_out));
38959 out_n = TYPE_VECTOR_SUBPARTS (type_out);
38960 in_mode = TYPE_MODE (TREE_TYPE (type_in));
38961 in_n = TYPE_VECTOR_SUBPARTS (type_in);
38963 switch (fn)
38965 CASE_CFN_EXP2:
38966 if (out_mode == SFmode && in_mode == SFmode)
38968 if (out_n == 16 && in_n == 16)
38969 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
38971 break;
38973 CASE_CFN_IFLOOR:
38974 CASE_CFN_LFLOOR:
38975 CASE_CFN_LLFLOOR:
38976 /* The round insn does not trap on denormals. */
38977 if (flag_trapping_math || !TARGET_SSE4_1)
38978 break;
38980 if (out_mode == SImode && in_mode == DFmode)
38982 if (out_n == 4 && in_n == 2)
38983 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
38984 else if (out_n == 8 && in_n == 4)
38985 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
38986 else if (out_n == 16 && in_n == 8)
38987 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
38989 if (out_mode == SImode && in_mode == SFmode)
38991 if (out_n == 4 && in_n == 4)
38992 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
38993 else if (out_n == 8 && in_n == 8)
38994 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
38995 else if (out_n == 16 && in_n == 16)
38996 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512);
38998 break;
39000 CASE_CFN_ICEIL:
39001 CASE_CFN_LCEIL:
39002 CASE_CFN_LLCEIL:
39003 /* The round insn does not trap on denormals. */
39004 if (flag_trapping_math || !TARGET_SSE4_1)
39005 break;
39007 if (out_mode == SImode && in_mode == DFmode)
39009 if (out_n == 4 && in_n == 2)
39010 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
39011 else if (out_n == 8 && in_n == 4)
39012 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
39013 else if (out_n == 16 && in_n == 8)
39014 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
39016 if (out_mode == SImode && in_mode == SFmode)
39018 if (out_n == 4 && in_n == 4)
39019 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
39020 else if (out_n == 8 && in_n == 8)
39021 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
39022 else if (out_n == 16 && in_n == 16)
39023 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512);
39025 break;
39027 CASE_CFN_IRINT:
39028 CASE_CFN_LRINT:
39029 CASE_CFN_LLRINT:
39030 if (out_mode == SImode && in_mode == DFmode)
39032 if (out_n == 4 && in_n == 2)
39033 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
39034 else if (out_n == 8 && in_n == 4)
39035 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
39036 else if (out_n == 16 && in_n == 8)
39037 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512);
39039 if (out_mode == SImode && in_mode == SFmode)
39041 if (out_n == 4 && in_n == 4)
39042 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
39043 else if (out_n == 8 && in_n == 8)
39044 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
39045 else if (out_n == 16 && in_n == 16)
39046 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512);
39048 break;
39050 CASE_CFN_IROUND:
39051 CASE_CFN_LROUND:
39052 CASE_CFN_LLROUND:
39053 /* The round insn does not trap on denormals. */
39054 if (flag_trapping_math || !TARGET_SSE4_1)
39055 break;
39057 if (out_mode == SImode && in_mode == DFmode)
39059 if (out_n == 4 && in_n == 2)
39060 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
39061 else if (out_n == 8 && in_n == 4)
39062 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
39063 else if (out_n == 16 && in_n == 8)
39064 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
39066 if (out_mode == SImode && in_mode == SFmode)
39068 if (out_n == 4 && in_n == 4)
39069 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
39070 else if (out_n == 8 && in_n == 8)
39071 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
39072 else if (out_n == 16 && in_n == 16)
39073 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512);
39075 break;
39077 CASE_CFN_FLOOR:
39078 /* The round insn does not trap on denormals. */
39079 if (flag_trapping_math || !TARGET_SSE4_1)
39080 break;
39082 if (out_mode == DFmode && in_mode == DFmode)
39084 if (out_n == 2 && in_n == 2)
39085 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
39086 else if (out_n == 4 && in_n == 4)
39087 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
39088 else if (out_n == 8 && in_n == 8)
39089 return ix86_get_builtin (IX86_BUILTIN_FLOORPD512);
39091 if (out_mode == SFmode && in_mode == SFmode)
39093 if (out_n == 4 && in_n == 4)
39094 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
39095 else if (out_n == 8 && in_n == 8)
39096 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
39097 else if (out_n == 16 && in_n == 16)
39098 return ix86_get_builtin (IX86_BUILTIN_FLOORPS512);
39100 break;
39102 CASE_CFN_CEIL:
39103 /* The round insn does not trap on denormals. */
39104 if (flag_trapping_math || !TARGET_SSE4_1)
39105 break;
39107 if (out_mode == DFmode && in_mode == DFmode)
39109 if (out_n == 2 && in_n == 2)
39110 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
39111 else if (out_n == 4 && in_n == 4)
39112 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
39113 else if (out_n == 8 && in_n == 8)
39114 return ix86_get_builtin (IX86_BUILTIN_CEILPD512);
39116 if (out_mode == SFmode && in_mode == SFmode)
39118 if (out_n == 4 && in_n == 4)
39119 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
39120 else if (out_n == 8 && in_n == 8)
39121 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
39122 else if (out_n == 16 && in_n == 16)
39123 return ix86_get_builtin (IX86_BUILTIN_CEILPS512);
39125 break;
39127 CASE_CFN_TRUNC:
39128 /* The round insn does not trap on denormals. */
39129 if (flag_trapping_math || !TARGET_SSE4_1)
39130 break;
39132 if (out_mode == DFmode && in_mode == DFmode)
39134 if (out_n == 2 && in_n == 2)
39135 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
39136 else if (out_n == 4 && in_n == 4)
39137 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
39138 else if (out_n == 8 && in_n == 8)
39139 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512);
39141 if (out_mode == SFmode && in_mode == SFmode)
39143 if (out_n == 4 && in_n == 4)
39144 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
39145 else if (out_n == 8 && in_n == 8)
39146 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
39147 else if (out_n == 16 && in_n == 16)
39148 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512);
39150 break;
39152 CASE_CFN_RINT:
39153 /* The round insn does not trap on denormals. */
39154 if (flag_trapping_math || !TARGET_SSE4_1)
39155 break;
39157 if (out_mode == DFmode && in_mode == DFmode)
39159 if (out_n == 2 && in_n == 2)
39160 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
39161 else if (out_n == 4 && in_n == 4)
39162 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
39164 if (out_mode == SFmode && in_mode == SFmode)
39166 if (out_n == 4 && in_n == 4)
39167 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
39168 else if (out_n == 8 && in_n == 8)
39169 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
39171 break;
39173 CASE_CFN_FMA:
39174 if (out_mode == DFmode && in_mode == DFmode)
39176 if (out_n == 2 && in_n == 2)
39177 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
39178 if (out_n == 4 && in_n == 4)
39179 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
39181 if (out_mode == SFmode && in_mode == SFmode)
39183 if (out_n == 4 && in_n == 4)
39184 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
39185 if (out_n == 8 && in_n == 8)
39186 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
39188 break;
39190 default:
39191 break;
39194 /* Dispatch to a handler for a vectorization library. */
39195 if (ix86_veclib_handler)
39196 return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
39198 return NULL_TREE;
39201 /* Handler for an SVML-style interface to
39202 a library with vectorized intrinsics. */
39204 static tree
39205 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
39207 char name[20];
39208 tree fntype, new_fndecl, args;
39209 unsigned arity;
39210 const char *bname;
39211 machine_mode el_mode, in_mode;
39212 int n, in_n;
39214 /* The SVML is suitable for unsafe math only. */
39215 if (!flag_unsafe_math_optimizations)
39216 return NULL_TREE;
39218 el_mode = TYPE_MODE (TREE_TYPE (type_out));
39219 n = TYPE_VECTOR_SUBPARTS (type_out);
39220 in_mode = TYPE_MODE (TREE_TYPE (type_in));
39221 in_n = TYPE_VECTOR_SUBPARTS (type_in);
39222 if (el_mode != in_mode
39223 || n != in_n)
39224 return NULL_TREE;
39226 switch (fn)
39228 CASE_CFN_EXP:
39229 CASE_CFN_LOG:
39230 CASE_CFN_LOG10:
39231 CASE_CFN_POW:
39232 CASE_CFN_TANH:
39233 CASE_CFN_TAN:
39234 CASE_CFN_ATAN:
39235 CASE_CFN_ATAN2:
39236 CASE_CFN_ATANH:
39237 CASE_CFN_CBRT:
39238 CASE_CFN_SINH:
39239 CASE_CFN_SIN:
39240 CASE_CFN_ASINH:
39241 CASE_CFN_ASIN:
39242 CASE_CFN_COSH:
39243 CASE_CFN_COS:
39244 CASE_CFN_ACOSH:
39245 CASE_CFN_ACOS:
39246 if ((el_mode != DFmode || n != 2)
39247 && (el_mode != SFmode || n != 4))
39248 return NULL_TREE;
39249 break;
39251 default:
39252 return NULL_TREE;
39255 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
39256 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
39258 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
39259 strcpy (name, "vmlsLn4");
39260 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
39261 strcpy (name, "vmldLn2");
39262 else if (n == 4)
39264 sprintf (name, "vmls%s", bname+10);
39265 name[strlen (name)-1] = '4';
39267 else
39268 sprintf (name, "vmld%s2", bname+10);
39270 /* Convert to uppercase. */
39271 name[4] &= ~0x20;
39273 arity = 0;
39274 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
39275 arity++;
39277 if (arity == 1)
39278 fntype = build_function_type_list (type_out, type_in, NULL);
39279 else
39280 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
39282 /* Build a function declaration for the vectorized function. */
39283 new_fndecl = build_decl (BUILTINS_LOCATION,
39284 FUNCTION_DECL, get_identifier (name), fntype);
39285 TREE_PUBLIC (new_fndecl) = 1;
39286 DECL_EXTERNAL (new_fndecl) = 1;
39287 DECL_IS_NOVOPS (new_fndecl) = 1;
39288 TREE_READONLY (new_fndecl) = 1;
39290 return new_fndecl;
39293 /* Handler for an ACML-style interface to
39294 a library with vectorized intrinsics. */
39296 static tree
39297 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
39299 char name[20] = "__vr.._";
39300 tree fntype, new_fndecl, args;
39301 unsigned arity;
39302 const char *bname;
39303 machine_mode el_mode, in_mode;
39304 int n, in_n;
39306 /* The ACML is 64bits only and suitable for unsafe math only as
39307 it does not correctly support parts of IEEE with the required
39308 precision such as denormals. */
39309 if (!TARGET_64BIT
39310 || !flag_unsafe_math_optimizations)
39311 return NULL_TREE;
39313 el_mode = TYPE_MODE (TREE_TYPE (type_out));
39314 n = TYPE_VECTOR_SUBPARTS (type_out);
39315 in_mode = TYPE_MODE (TREE_TYPE (type_in));
39316 in_n = TYPE_VECTOR_SUBPARTS (type_in);
39317 if (el_mode != in_mode
39318 || n != in_n)
39319 return NULL_TREE;
39321 switch (fn)
39323 CASE_CFN_SIN:
39324 CASE_CFN_COS:
39325 CASE_CFN_EXP:
39326 CASE_CFN_LOG:
39327 CASE_CFN_LOG2:
39328 CASE_CFN_LOG10:
39329 if (el_mode == DFmode && n == 2)
39331 name[4] = 'd';
39332 name[5] = '2';
39334 else if (el_mode == SFmode && n == 4)
39336 name[4] = 's';
39337 name[5] = '4';
39339 else
39340 return NULL_TREE;
39341 break;
39343 default:
39344 return NULL_TREE;
39347 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
39348 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
39349 sprintf (name + 7, "%s", bname+10);
39351 arity = 0;
39352 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
39353 arity++;
39355 if (arity == 1)
39356 fntype = build_function_type_list (type_out, type_in, NULL);
39357 else
39358 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
39360 /* Build a function declaration for the vectorized function. */
39361 new_fndecl = build_decl (BUILTINS_LOCATION,
39362 FUNCTION_DECL, get_identifier (name), fntype);
39363 TREE_PUBLIC (new_fndecl) = 1;
39364 DECL_EXTERNAL (new_fndecl) = 1;
39365 DECL_IS_NOVOPS (new_fndecl) = 1;
39366 TREE_READONLY (new_fndecl) = 1;
39368 return new_fndecl;
39371 /* Returns a decl of a function that implements gather load with
39372 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
39373 Return NULL_TREE if it is not available. */
39375 static tree
39376 ix86_vectorize_builtin_gather (const_tree mem_vectype,
39377 const_tree index_type, int scale)
39379 bool si;
39380 enum ix86_builtins code;
39382 if (! TARGET_AVX2 || !TARGET_USE_GATHER)
39383 return NULL_TREE;
39385 if ((TREE_CODE (index_type) != INTEGER_TYPE
39386 && !POINTER_TYPE_P (index_type))
39387 || (TYPE_MODE (index_type) != SImode
39388 && TYPE_MODE (index_type) != DImode))
39389 return NULL_TREE;
39391 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
39392 return NULL_TREE;
39394 /* v*gather* insn sign extends index to pointer mode. */
39395 if (TYPE_PRECISION (index_type) < POINTER_SIZE
39396 && TYPE_UNSIGNED (index_type))
39397 return NULL_TREE;
39399 if (scale <= 0
39400 || scale > 8
39401 || (scale & (scale - 1)) != 0)
39402 return NULL_TREE;
39404 si = TYPE_MODE (index_type) == SImode;
39405 switch (TYPE_MODE (mem_vectype))
39407 case E_V2DFmode:
39408 if (TARGET_AVX512VL)
39409 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
39410 else
39411 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
39412 break;
39413 case E_V4DFmode:
39414 if (TARGET_AVX512VL)
39415 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
39416 else
39417 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
39418 break;
39419 case E_V2DImode:
39420 if (TARGET_AVX512VL)
39421 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
39422 else
39423 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
39424 break;
39425 case E_V4DImode:
39426 if (TARGET_AVX512VL)
39427 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
39428 else
39429 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
39430 break;
39431 case E_V4SFmode:
39432 if (TARGET_AVX512VL)
39433 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
39434 else
39435 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
39436 break;
39437 case E_V8SFmode:
39438 if (TARGET_AVX512VL)
39439 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
39440 else
39441 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
39442 break;
39443 case E_V4SImode:
39444 if (TARGET_AVX512VL)
39445 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
39446 else
39447 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
39448 break;
39449 case E_V8SImode:
39450 if (TARGET_AVX512VL)
39451 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
39452 else
39453 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
39454 break;
39455 case E_V8DFmode:
39456 if (TARGET_AVX512F)
39457 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
39458 else
39459 return NULL_TREE;
39460 break;
39461 case E_V8DImode:
39462 if (TARGET_AVX512F)
39463 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
39464 else
39465 return NULL_TREE;
39466 break;
39467 case E_V16SFmode:
39468 if (TARGET_AVX512F)
39469 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
39470 else
39471 return NULL_TREE;
39472 break;
39473 case E_V16SImode:
39474 if (TARGET_AVX512F)
39475 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
39476 else
39477 return NULL_TREE;
39478 break;
39479 default:
39480 return NULL_TREE;
39483 return ix86_get_builtin (code);
39486 /* Returns a decl of a function that implements scatter store with
39487 register type VECTYPE and index type INDEX_TYPE and SCALE.
39488 Return NULL_TREE if it is not available. */
39490 static tree
39491 ix86_vectorize_builtin_scatter (const_tree vectype,
39492 const_tree index_type, int scale)
39494 bool si;
39495 enum ix86_builtins code;
39497 if (!TARGET_AVX512F)
39498 return NULL_TREE;
39500 if ((TREE_CODE (index_type) != INTEGER_TYPE
39501 && !POINTER_TYPE_P (index_type))
39502 || (TYPE_MODE (index_type) != SImode
39503 && TYPE_MODE (index_type) != DImode))
39504 return NULL_TREE;
39506 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
39507 return NULL_TREE;
39509 /* v*scatter* insn sign extends index to pointer mode. */
39510 if (TYPE_PRECISION (index_type) < POINTER_SIZE
39511 && TYPE_UNSIGNED (index_type))
39512 return NULL_TREE;
39514 /* Scale can be 1, 2, 4 or 8. */
39515 if (scale <= 0
39516 || scale > 8
39517 || (scale & (scale - 1)) != 0)
39518 return NULL_TREE;
39520 si = TYPE_MODE (index_type) == SImode;
39521 switch (TYPE_MODE (vectype))
39523 case E_V8DFmode:
39524 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
39525 break;
39526 case E_V8DImode:
39527 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
39528 break;
39529 case E_V16SFmode:
39530 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
39531 break;
39532 case E_V16SImode:
39533 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
39534 break;
39535 default:
39536 return NULL_TREE;
39539 return ix86_builtins[code];
39542 /* Return true if it is safe to use the rsqrt optabs to optimize
39543 1.0/sqrt. */
39545 static bool
39546 use_rsqrt_p ()
39548 return (TARGET_SSE_MATH
39549 && flag_finite_math_only
39550 && !flag_trapping_math
39551 && flag_unsafe_math_optimizations);
39554 /* Returns a code for a target-specific builtin that implements
39555 reciprocal of the function, or NULL_TREE if not available. */
39557 static tree
39558 ix86_builtin_reciprocal (tree fndecl)
39560 switch (DECL_FUNCTION_CODE (fndecl))
39562 /* Vectorized version of sqrt to rsqrt conversion. */
39563 case IX86_BUILTIN_SQRTPS_NR:
39564 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
39566 case IX86_BUILTIN_SQRTPS_NR256:
39567 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
39569 default:
39570 return NULL_TREE;
39574 /* Helper for avx_vpermilps256_operand et al. This is also used by
39575 the expansion functions to turn the parallel back into a mask.
39576 The return value is 0 for no match and the imm8+1 for a match. */
39579 avx_vpermilp_parallel (rtx par, machine_mode mode)
39581 unsigned i, nelt = GET_MODE_NUNITS (mode);
39582 unsigned mask = 0;
39583 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
39585 if (XVECLEN (par, 0) != (int) nelt)
39586 return 0;
39588 /* Validate that all of the elements are constants, and not totally
39589 out of range. Copy the data into an integral array to make the
39590 subsequent checks easier. */
39591 for (i = 0; i < nelt; ++i)
39593 rtx er = XVECEXP (par, 0, i);
39594 unsigned HOST_WIDE_INT ei;
39596 if (!CONST_INT_P (er))
39597 return 0;
39598 ei = INTVAL (er);
39599 if (ei >= nelt)
39600 return 0;
39601 ipar[i] = ei;
39604 switch (mode)
39606 case E_V8DFmode:
39607 /* In the 512-bit DFmode case, we can only move elements within
39608 a 128-bit lane. First fill the second part of the mask,
39609 then fallthru. */
39610 for (i = 4; i < 6; ++i)
39612 if (ipar[i] < 4 || ipar[i] >= 6)
39613 return 0;
39614 mask |= (ipar[i] - 4) << i;
39616 for (i = 6; i < 8; ++i)
39618 if (ipar[i] < 6)
39619 return 0;
39620 mask |= (ipar[i] - 6) << i;
39622 /* FALLTHRU */
39624 case E_V4DFmode:
39625 /* In the 256-bit DFmode case, we can only move elements within
39626 a 128-bit lane. */
39627 for (i = 0; i < 2; ++i)
39629 if (ipar[i] >= 2)
39630 return 0;
39631 mask |= ipar[i] << i;
39633 for (i = 2; i < 4; ++i)
39635 if (ipar[i] < 2)
39636 return 0;
39637 mask |= (ipar[i] - 2) << i;
39639 break;
39641 case E_V16SFmode:
39642 /* In 512 bit SFmode case, permutation in the upper 256 bits
39643 must mirror the permutation in the lower 256-bits. */
39644 for (i = 0; i < 8; ++i)
39645 if (ipar[i] + 8 != ipar[i + 8])
39646 return 0;
39647 /* FALLTHRU */
39649 case E_V8SFmode:
39650 /* In 256 bit SFmode case, we have full freedom of
39651 movement within the low 128-bit lane, but the high 128-bit
39652 lane must mirror the exact same pattern. */
39653 for (i = 0; i < 4; ++i)
39654 if (ipar[i] + 4 != ipar[i + 4])
39655 return 0;
39656 nelt = 4;
39657 /* FALLTHRU */
39659 case E_V2DFmode:
39660 case E_V4SFmode:
39661 /* In the 128-bit case, we've full freedom in the placement of
39662 the elements from the source operand. */
39663 for (i = 0; i < nelt; ++i)
39664 mask |= ipar[i] << (i * (nelt / 2));
39665 break;
39667 default:
39668 gcc_unreachable ();
39671 /* Make sure success has a non-zero value by adding one. */
39672 return mask + 1;
39675 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
39676 the expansion functions to turn the parallel back into a mask.
39677 The return value is 0 for no match and the imm8+1 for a match. */
39680 avx_vperm2f128_parallel (rtx par, machine_mode mode)
39682 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
39683 unsigned mask = 0;
39684 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
39686 if (XVECLEN (par, 0) != (int) nelt)
39687 return 0;
39689 /* Validate that all of the elements are constants, and not totally
39690 out of range. Copy the data into an integral array to make the
39691 subsequent checks easier. */
39692 for (i = 0; i < nelt; ++i)
39694 rtx er = XVECEXP (par, 0, i);
39695 unsigned HOST_WIDE_INT ei;
39697 if (!CONST_INT_P (er))
39698 return 0;
39699 ei = INTVAL (er);
39700 if (ei >= 2 * nelt)
39701 return 0;
39702 ipar[i] = ei;
39705 /* Validate that the halves of the permute are halves. */
39706 for (i = 0; i < nelt2 - 1; ++i)
39707 if (ipar[i] + 1 != ipar[i + 1])
39708 return 0;
39709 for (i = nelt2; i < nelt - 1; ++i)
39710 if (ipar[i] + 1 != ipar[i + 1])
39711 return 0;
39713 /* Reconstruct the mask. */
39714 for (i = 0; i < 2; ++i)
39716 unsigned e = ipar[i * nelt2];
39717 if (e % nelt2)
39718 return 0;
39719 e /= nelt2;
39720 mask |= e << (i * 4);
39723 /* Make sure success has a non-zero value by adding one. */
39724 return mask + 1;
39727 /* Return a register priority for hard reg REGNO. */
39728 static int
39729 ix86_register_priority (int hard_regno)
39731 /* ebp and r13 as the base always wants a displacement, r12 as the
39732 base always wants an index. So discourage their usage in an
39733 address. */
39734 if (hard_regno == R12_REG || hard_regno == R13_REG)
39735 return 0;
39736 if (hard_regno == BP_REG)
39737 return 1;
39738 /* New x86-64 int registers result in bigger code size. Discourage
39739 them. */
39740 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
39741 return 2;
39742 /* New x86-64 SSE registers result in bigger code size. Discourage
39743 them. */
39744 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
39745 return 2;
39746 /* Usage of AX register results in smaller code. Prefer it. */
39747 if (hard_regno == AX_REG)
39748 return 4;
39749 return 3;
39752 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
39754 Put float CONST_DOUBLE in the constant pool instead of fp regs.
39755 QImode must go into class Q_REGS.
39756 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
39757 movdf to do mem-to-mem moves through integer regs. */
39759 static reg_class_t
39760 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
39762 machine_mode mode = GET_MODE (x);
39764 /* We're only allowed to return a subclass of CLASS. Many of the
39765 following checks fail for NO_REGS, so eliminate that early. */
39766 if (regclass == NO_REGS)
39767 return NO_REGS;
39769 /* All classes can load zeros. */
39770 if (x == CONST0_RTX (mode))
39771 return regclass;
39773 /* Force constants into memory if we are loading a (nonzero) constant into
39774 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
39775 instructions to load from a constant. */
39776 if (CONSTANT_P (x)
39777 && (MAYBE_MMX_CLASS_P (regclass)
39778 || MAYBE_SSE_CLASS_P (regclass)
39779 || MAYBE_MASK_CLASS_P (regclass)))
39780 return NO_REGS;
39782 /* Floating-point constants need more complex checks. */
39783 if (CONST_DOUBLE_P (x))
39785 /* General regs can load everything. */
39786 if (INTEGER_CLASS_P (regclass))
39787 return regclass;
39789 /* Floats can load 0 and 1 plus some others. Note that we eliminated
39790 zero above. We only want to wind up preferring 80387 registers if
39791 we plan on doing computation with them. */
39792 if (IS_STACK_MODE (mode)
39793 && standard_80387_constant_p (x) > 0)
39795 /* Limit class to FP regs. */
39796 if (FLOAT_CLASS_P (regclass))
39797 return FLOAT_REGS;
39798 else if (regclass == FP_TOP_SSE_REGS)
39799 return FP_TOP_REG;
39800 else if (regclass == FP_SECOND_SSE_REGS)
39801 return FP_SECOND_REG;
39804 return NO_REGS;
39807 /* Prefer SSE regs only, if we can use them for math. */
39808 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39809 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
39811 /* Generally when we see PLUS here, it's the function invariant
39812 (plus soft-fp const_int). Which can only be computed into general
39813 regs. */
39814 if (GET_CODE (x) == PLUS)
39815 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
39817 /* QImode constants are easy to load, but non-constant QImode data
39818 must go into Q_REGS. */
39819 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
39821 if (Q_CLASS_P (regclass))
39822 return regclass;
39823 else if (reg_class_subset_p (Q_REGS, regclass))
39824 return Q_REGS;
39825 else
39826 return NO_REGS;
39829 return regclass;
39832 /* Discourage putting floating-point values in SSE registers unless
39833 SSE math is being used, and likewise for the 387 registers. */
39834 static reg_class_t
39835 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
39837 machine_mode mode = GET_MODE (x);
39839 /* Restrict the output reload class to the register bank that we are doing
39840 math on. If we would like not to return a subset of CLASS, reject this
39841 alternative: if reload cannot do this, it will still use its choice. */
39842 mode = GET_MODE (x);
39843 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39844 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
39846 if (IS_STACK_MODE (mode))
39848 if (regclass == FP_TOP_SSE_REGS)
39849 return FP_TOP_REG;
39850 else if (regclass == FP_SECOND_SSE_REGS)
39851 return FP_SECOND_REG;
39852 else
39853 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
39856 return regclass;
39859 static reg_class_t
39860 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
39861 machine_mode mode, secondary_reload_info *sri)
39863 /* Double-word spills from general registers to non-offsettable memory
39864 references (zero-extended addresses) require special handling. */
39865 if (TARGET_64BIT
39866 && MEM_P (x)
39867 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
39868 && INTEGER_CLASS_P (rclass)
39869 && !offsettable_memref_p (x))
39871 sri->icode = (in_p
39872 ? CODE_FOR_reload_noff_load
39873 : CODE_FOR_reload_noff_store);
39874 /* Add the cost of moving address to a temporary. */
39875 sri->extra_cost = 1;
39877 return NO_REGS;
39880 /* QImode spills from non-QI registers require
39881 intermediate register on 32bit targets. */
39882 if (mode == QImode
39883 && ((!TARGET_64BIT && !in_p
39884 && INTEGER_CLASS_P (rclass)
39885 && MAYBE_NON_Q_CLASS_P (rclass))
39886 || (!TARGET_AVX512DQ
39887 && MAYBE_MASK_CLASS_P (rclass))))
39889 int regno = true_regnum (x);
39891 /* Return Q_REGS if the operand is in memory. */
39892 if (regno == -1)
39893 return Q_REGS;
39895 return NO_REGS;
39898 /* This condition handles corner case where an expression involving
39899 pointers gets vectorized. We're trying to use the address of a
39900 stack slot as a vector initializer.
39902 (set (reg:V2DI 74 [ vect_cst_.2 ])
39903 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
39905 Eventually frame gets turned into sp+offset like this:
39907 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39908 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
39909 (const_int 392 [0x188]))))
39911 That later gets turned into:
39913 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39914 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
39915 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
39917 We'll have the following reload recorded:
39919 Reload 0: reload_in (DI) =
39920 (plus:DI (reg/f:DI 7 sp)
39921 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
39922 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39923 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
39924 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
39925 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39926 reload_reg_rtx: (reg:V2DI 22 xmm1)
39928 Which isn't going to work since SSE instructions can't handle scalar
39929 additions. Returning GENERAL_REGS forces the addition into integer
39930 register and reload can handle subsequent reloads without problems. */
39932 if (in_p && GET_CODE (x) == PLUS
39933 && SSE_CLASS_P (rclass)
39934 && SCALAR_INT_MODE_P (mode))
39935 return GENERAL_REGS;
39937 return NO_REGS;
39940 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
39942 static bool
39943 ix86_class_likely_spilled_p (reg_class_t rclass)
39945 switch (rclass)
39947 case AREG:
39948 case DREG:
39949 case CREG:
39950 case BREG:
39951 case AD_REGS:
39952 case SIREG:
39953 case DIREG:
39954 case SSE_FIRST_REG:
39955 case FP_TOP_REG:
39956 case FP_SECOND_REG:
39957 case BND_REGS:
39958 return true;
39960 default:
39961 break;
39964 return false;
39967 /* If we are copying between registers from different register sets
39968 (e.g. FP and integer), we may need a memory location.
39970 The function can't work reliably when one of the CLASSES is a class
39971 containing registers from multiple sets. We avoid this by never combining
39972 different sets in a single alternative in the machine description.
39973 Ensure that this constraint holds to avoid unexpected surprises.
39975 When STRICT is false, we are being called from REGISTER_MOVE_COST,
39976 so do not enforce these sanity checks.
39978 To optimize register_move_cost performance, define inline variant. */
39980 static inline bool
39981 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
39982 reg_class_t class2, int strict)
39984 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
39985 return false;
39987 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
39988 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
39989 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
39990 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
39991 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
39992 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
39993 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
39994 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
39996 gcc_assert (!strict || lra_in_progress);
39997 return true;
40000 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
40001 return true;
40003 /* Between mask and general, we have moves no larger than word size. */
40004 if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
40005 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
40006 return true;
40008 /* ??? This is a lie. We do have moves between mmx/general, and for
40009 mmx/sse2. But by saying we need secondary memory we discourage the
40010 register allocator from using the mmx registers unless needed. */
40011 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
40012 return true;
40014 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
40016 /* SSE1 doesn't have any direct moves from other classes. */
40017 if (!TARGET_SSE2)
40018 return true;
40020 /* If the target says that inter-unit moves are more expensive
40021 than moving through memory, then don't generate them. */
40022 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
40023 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
40024 return true;
40026 /* Between SSE and general, we have moves no larger than word size. */
40027 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
40028 return true;
40031 return false;
40034 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
40036 static bool
40037 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
40038 reg_class_t class2)
40040 return inline_secondary_memory_needed (mode, class1, class2, true);
40043 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
40045 get_secondary_mem widens integral modes to BITS_PER_WORD.
40046 There is no need to emit full 64 bit move on 64 bit targets
40047 for integral modes that can be moved using 32 bit move. */
40049 static machine_mode
40050 ix86_secondary_memory_needed_mode (machine_mode mode)
40052 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
40053 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
40054 return mode;
40057 /* Implement the TARGET_CLASS_MAX_NREGS hook.
40059 On the 80386, this is the size of MODE in words,
40060 except in the FP regs, where a single reg is always enough. */
40062 static unsigned char
40063 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
40065 if (MAYBE_INTEGER_CLASS_P (rclass))
40067 if (mode == XFmode)
40068 return (TARGET_64BIT ? 2 : 3);
40069 else if (mode == XCmode)
40070 return (TARGET_64BIT ? 4 : 6);
40071 else
40072 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
40074 else
40076 if (COMPLEX_MODE_P (mode))
40077 return 2;
40078 else
40079 return 1;
40083 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
40085 static bool
40086 ix86_can_change_mode_class (machine_mode from, machine_mode to,
40087 reg_class_t regclass)
40089 if (from == to)
40090 return true;
40092 /* x87 registers can't do subreg at all, as all values are reformatted
40093 to extended precision. */
40094 if (MAYBE_FLOAT_CLASS_P (regclass))
40095 return false;
40097 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
40099 /* Vector registers do not support QI or HImode loads. If we don't
40100 disallow a change to these modes, reload will assume it's ok to
40101 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
40102 the vec_dupv4hi pattern. */
40103 if (GET_MODE_SIZE (from) < 4)
40104 return false;
40107 return true;
40110 /* Return index of MODE in the sse load/store tables. */
40112 static inline int
40113 sse_store_index (machine_mode mode)
40115 switch (GET_MODE_SIZE (mode))
40117 case 4:
40118 return 0;
40119 case 8:
40120 return 1;
40121 case 16:
40122 return 2;
40123 case 32:
40124 return 3;
40125 case 64:
40126 return 4;
40127 default:
40128 return -1;
40132 /* Return the cost of moving data of mode M between a
40133 register and memory. A value of 2 is the default; this cost is
40134 relative to those in `REGISTER_MOVE_COST'.
40136 This function is used extensively by register_move_cost that is used to
40137 build tables at startup. Make it inline in this case.
40138 When IN is 2, return maximum of in and out move cost.
40140 If moving between registers and memory is more expensive than
40141 between two registers, you should define this macro to express the
40142 relative cost.
40144 Model also increased moving costs of QImode registers in non
40145 Q_REGS classes.
40147 static inline int
40148 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
40149 int in)
40151 int cost;
40152 if (FLOAT_CLASS_P (regclass))
40154 int index;
40155 switch (mode)
40157 case E_SFmode:
40158 index = 0;
40159 break;
40160 case E_DFmode:
40161 index = 1;
40162 break;
40163 case E_XFmode:
40164 index = 2;
40165 break;
40166 default:
40167 return 100;
40169 if (in == 2)
40170 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
40171 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
40173 if (SSE_CLASS_P (regclass))
40175 int index = sse_store_index (mode);
40176 if (index == -1)
40177 return 100;
40178 if (in == 2)
40179 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
40180 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
40182 if (MMX_CLASS_P (regclass))
40184 int index;
40185 switch (GET_MODE_SIZE (mode))
40187 case 4:
40188 index = 0;
40189 break;
40190 case 8:
40191 index = 1;
40192 break;
40193 default:
40194 return 100;
40196 if (in)
40197 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
40198 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
40200 switch (GET_MODE_SIZE (mode))
40202 case 1:
40203 if (Q_CLASS_P (regclass) || TARGET_64BIT)
40205 if (!in)
40206 return ix86_cost->int_store[0];
40207 if (TARGET_PARTIAL_REG_DEPENDENCY
40208 && optimize_function_for_speed_p (cfun))
40209 cost = ix86_cost->movzbl_load;
40210 else
40211 cost = ix86_cost->int_load[0];
40212 if (in == 2)
40213 return MAX (cost, ix86_cost->int_store[0]);
40214 return cost;
40216 else
40218 if (in == 2)
40219 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
40220 if (in)
40221 return ix86_cost->movzbl_load;
40222 else
40223 return ix86_cost->int_store[0] + 4;
40225 break;
40226 case 2:
40227 if (in == 2)
40228 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
40229 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
40230 default:
40231 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
40232 if (mode == TFmode)
40233 mode = XFmode;
40234 if (in == 2)
40235 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
40236 else if (in)
40237 cost = ix86_cost->int_load[2];
40238 else
40239 cost = ix86_cost->int_store[2];
40240 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
40244 static int
40245 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
40246 bool in)
40248 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
40252 /* Return the cost of moving data from a register in class CLASS1 to
40253 one in class CLASS2.
40255 It is not required that the cost always equal 2 when FROM is the same as TO;
40256 on some machines it is expensive to move between registers if they are not
40257 general registers. */
40259 static int
40260 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
40261 reg_class_t class2_i)
40263 enum reg_class class1 = (enum reg_class) class1_i;
40264 enum reg_class class2 = (enum reg_class) class2_i;
40266 /* In case we require secondary memory, compute cost of the store followed
40267 by load. In order to avoid bad register allocation choices, we need
40268 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
40270 if (inline_secondary_memory_needed (mode, class1, class2, false))
40272 int cost = 1;
40274 cost += inline_memory_move_cost (mode, class1, 2);
40275 cost += inline_memory_move_cost (mode, class2, 2);
40277 /* In case of copying from general_purpose_register we may emit multiple
40278 stores followed by single load causing memory size mismatch stall.
40279 Count this as arbitrarily high cost of 20. */
40280 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
40281 && TARGET_MEMORY_MISMATCH_STALL
40282 && targetm.class_max_nregs (class1, mode)
40283 > targetm.class_max_nregs (class2, mode))
40284 cost += 20;
40286 /* In the case of FP/MMX moves, the registers actually overlap, and we
40287 have to switch modes in order to treat them differently. */
40288 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
40289 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
40290 cost += 20;
40292 return cost;
40295 /* Moves between SSE/MMX and integer unit are expensive. */
40296 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
40297 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
40299 /* ??? By keeping returned value relatively high, we limit the number
40300 of moves between integer and MMX/SSE registers for all targets.
40301 Additionally, high value prevents problem with x86_modes_tieable_p(),
40302 where integer modes in MMX/SSE registers are not tieable
40303 because of missing QImode and HImode moves to, from or between
40304 MMX/SSE registers. */
40305 return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2)
40306 ? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer);
40308 if (MAYBE_FLOAT_CLASS_P (class1))
40309 return ix86_cost->fp_move;
40310 if (MAYBE_SSE_CLASS_P (class1))
40312 if (GET_MODE_BITSIZE (mode) <= 128)
40313 return ix86_cost->xmm_move;
40314 if (GET_MODE_BITSIZE (mode) <= 256)
40315 return ix86_cost->ymm_move;
40316 return ix86_cost->zmm_move;
40318 if (MAYBE_MMX_CLASS_P (class1))
40319 return ix86_cost->mmx_move;
40320 return 2;
40323 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
40324 words of a value of mode MODE but can be less for certain modes in
40325 special long registers.
40327 Actually there are no two word move instructions for consecutive
40328 registers. And only registers 0-3 may have mov byte instructions
40329 applied to them. */
40331 static unsigned int
40332 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
40334 if (GENERAL_REGNO_P (regno))
40336 if (mode == XFmode)
40337 return TARGET_64BIT ? 2 : 3;
40338 if (mode == XCmode)
40339 return TARGET_64BIT ? 4 : 6;
40340 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
40342 if (COMPLEX_MODE_P (mode))
40343 return 2;
40344 if (mode == V64SFmode || mode == V64SImode)
40345 return 4;
40346 return 1;
40349 /* Implement TARGET_HARD_REGNO_MODE_OK. */
40351 static bool
40352 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
40354 /* Flags and only flags can only hold CCmode values. */
40355 if (CC_REGNO_P (regno))
40356 return GET_MODE_CLASS (mode) == MODE_CC;
40357 if (GET_MODE_CLASS (mode) == MODE_CC
40358 || GET_MODE_CLASS (mode) == MODE_RANDOM
40359 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
40360 return false;
40361 if (STACK_REGNO_P (regno))
40362 return VALID_FP_MODE_P (mode);
40363 if (MASK_REGNO_P (regno))
40364 return (VALID_MASK_REG_MODE (mode)
40365 || (TARGET_AVX512BW
40366 && VALID_MASK_AVX512BW_MODE (mode)));
40367 if (BND_REGNO_P (regno))
40368 return VALID_BND_REG_MODE (mode);
40369 if (SSE_REGNO_P (regno))
40371 /* We implement the move patterns for all vector modes into and
40372 out of SSE registers, even when no operation instructions
40373 are available. */
40375 /* For AVX-512 we allow, regardless of regno:
40376 - XI mode
40377 - any of 512-bit wide vector mode
40378 - any scalar mode. */
40379 if (TARGET_AVX512F
40380 && (mode == XImode
40381 || VALID_AVX512F_REG_MODE (mode)
40382 || VALID_AVX512F_SCALAR_MODE (mode)))
40383 return true;
40385 /* For AVX-5124FMAPS allow V64SFmode for special regnos. */
40386 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
40387 && MOD4_SSE_REGNO_P (regno)
40388 && mode == V64SFmode)
40389 return true;
40391 /* For AVX-5124VNNIW allow V64SImode for special regnos. */
40392 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
40393 && MOD4_SSE_REGNO_P (regno)
40394 && mode == V64SImode)
40395 return true;
40397 /* TODO check for QI/HI scalars. */
40398 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
40399 if (TARGET_AVX512VL
40400 && (mode == OImode
40401 || mode == TImode
40402 || VALID_AVX256_REG_MODE (mode)
40403 || VALID_AVX512VL_128_REG_MODE (mode)))
40404 return true;
40406 /* xmm16-xmm31 are only available for AVX-512. */
40407 if (EXT_REX_SSE_REGNO_P (regno))
40408 return false;
40410 /* OImode and AVX modes are available only when AVX is enabled. */
40411 return ((TARGET_AVX
40412 && VALID_AVX256_REG_OR_OI_MODE (mode))
40413 || VALID_SSE_REG_MODE (mode)
40414 || VALID_SSE2_REG_MODE (mode)
40415 || VALID_MMX_REG_MODE (mode)
40416 || VALID_MMX_REG_MODE_3DNOW (mode));
40418 if (MMX_REGNO_P (regno))
40420 /* We implement the move patterns for 3DNOW modes even in MMX mode,
40421 so if the register is available at all, then we can move data of
40422 the given mode into or out of it. */
40423 return (VALID_MMX_REG_MODE (mode)
40424 || VALID_MMX_REG_MODE_3DNOW (mode));
40427 if (mode == QImode)
40429 /* Take care for QImode values - they can be in non-QI regs,
40430 but then they do cause partial register stalls. */
40431 if (ANY_QI_REGNO_P (regno))
40432 return true;
40433 if (!TARGET_PARTIAL_REG_STALL)
40434 return true;
40435 /* LRA checks if the hard register is OK for the given mode.
40436 QImode values can live in non-QI regs, so we allow all
40437 registers here. */
40438 if (lra_in_progress)
40439 return true;
40440 return !can_create_pseudo_p ();
40442 /* We handle both integer and floats in the general purpose registers. */
40443 else if (VALID_INT_MODE_P (mode))
40444 return true;
40445 else if (VALID_FP_MODE_P (mode))
40446 return true;
40447 else if (VALID_DFP_MODE_P (mode))
40448 return true;
40449 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
40450 on to use that value in smaller contexts, this can easily force a
40451 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
40452 supporting DImode, allow it. */
40453 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
40454 return true;
40456 return false;
40459 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
40460 saves SSE registers across calls is Win64 (thus no need to check the
40461 current ABI here), and with AVX enabled Win64 only guarantees that
40462 the low 16 bytes are saved. */
40464 static bool
40465 ix86_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
40467 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
40470 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
40471 tieable integer mode. */
40473 static bool
40474 ix86_tieable_integer_mode_p (machine_mode mode)
40476 switch (mode)
40478 case E_HImode:
40479 case E_SImode:
40480 return true;
40482 case E_QImode:
40483 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
40485 case E_DImode:
40486 return TARGET_64BIT;
40488 default:
40489 return false;
40493 /* Implement TARGET_MODES_TIEABLE_P.
40495 Return true if MODE1 is accessible in a register that can hold MODE2
40496 without copying. That is, all register classes that can hold MODE2
40497 can also hold MODE1. */
40499 static bool
40500 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
40502 if (mode1 == mode2)
40503 return true;
40505 if (ix86_tieable_integer_mode_p (mode1)
40506 && ix86_tieable_integer_mode_p (mode2))
40507 return true;
40509 /* MODE2 being XFmode implies fp stack or general regs, which means we
40510 can tie any smaller floating point modes to it. Note that we do not
40511 tie this with TFmode. */
40512 if (mode2 == XFmode)
40513 return mode1 == SFmode || mode1 == DFmode;
40515 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
40516 that we can tie it with SFmode. */
40517 if (mode2 == DFmode)
40518 return mode1 == SFmode;
40520 /* If MODE2 is only appropriate for an SSE register, then tie with
40521 any other mode acceptable to SSE registers. */
40522 if (GET_MODE_SIZE (mode2) == 32
40523 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
40524 return (GET_MODE_SIZE (mode1) == 32
40525 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
40526 if (GET_MODE_SIZE (mode2) == 16
40527 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
40528 return (GET_MODE_SIZE (mode1) == 16
40529 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
40531 /* If MODE2 is appropriate for an MMX register, then tie
40532 with any other mode acceptable to MMX registers. */
40533 if (GET_MODE_SIZE (mode2) == 8
40534 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
40535 return (GET_MODE_SIZE (mode1) == 8
40536 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
40538 return false;
40541 /* Return the cost of moving between two registers of mode MODE. */
40543 static int
40544 ix86_set_reg_reg_cost (machine_mode mode)
40546 unsigned int units = UNITS_PER_WORD;
40548 switch (GET_MODE_CLASS (mode))
40550 default:
40551 break;
40553 case MODE_CC:
40554 units = GET_MODE_SIZE (CCmode);
40555 break;
40557 case MODE_FLOAT:
40558 if ((TARGET_SSE && mode == TFmode)
40559 || (TARGET_80387 && mode == XFmode)
40560 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
40561 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
40562 units = GET_MODE_SIZE (mode);
40563 break;
40565 case MODE_COMPLEX_FLOAT:
40566 if ((TARGET_SSE && mode == TCmode)
40567 || (TARGET_80387 && mode == XCmode)
40568 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
40569 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
40570 units = GET_MODE_SIZE (mode);
40571 break;
40573 case MODE_VECTOR_INT:
40574 case MODE_VECTOR_FLOAT:
40575 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
40576 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
40577 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
40578 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
40579 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
40580 units = GET_MODE_SIZE (mode);
40583 /* Return the cost of moving between two registers of mode MODE,
40584 assuming that the move will be in pieces of at most UNITS bytes. */
40585 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
40588 /* Return cost of vector operation in MODE given that scalar version has
40589 COST. If PARALLEL is true assume that CPU has more than one unit
40590 performing the operation. */
40592 static int
40593 ix86_vec_cost (machine_mode mode, int cost, bool parallel)
40595 if (!VECTOR_MODE_P (mode))
40596 return cost;
40598 if (!parallel)
40599 return cost * GET_MODE_NUNITS (mode);
40600 if (GET_MODE_BITSIZE (mode) == 128
40601 && TARGET_SSE_SPLIT_REGS)
40602 return cost * 2;
40603 if (GET_MODE_BITSIZE (mode) > 128
40604 && TARGET_AVX128_OPTIMAL)
40605 return cost * GET_MODE_BITSIZE (mode) / 128;
40606 return cost;
40609 /* Return cost of multiplication in MODE. */
40611 static int
40612 ix86_multiplication_cost (const struct processor_costs *cost,
40613 enum machine_mode mode)
40615 machine_mode inner_mode = mode;
40616 if (VECTOR_MODE_P (mode))
40617 inner_mode = GET_MODE_INNER (mode);
40619 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40620 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
40621 else if (X87_FLOAT_MODE_P (mode))
40622 return cost->fmul;
40623 else if (FLOAT_MODE_P (mode))
40624 return ix86_vec_cost (mode,
40625 inner_mode == DFmode
40626 ? cost->mulsd : cost->mulss, true);
40627 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
40629 /* vpmullq is used in this case. No emulation is needed. */
40630 if (TARGET_AVX512DQ)
40631 return ix86_vec_cost (mode, cost->mulss, true);
40633 /* V*QImode is emulated with 7-13 insns. */
40634 if (mode == V16QImode || mode == V32QImode)
40636 int extra = 11;
40637 if (TARGET_XOP && mode == V16QImode)
40638 extra = 5;
40639 else if (TARGET_SSSE3)
40640 extra = 6;
40641 return ix86_vec_cost (mode,
40642 cost->mulss * 2 + cost->sse_op * extra,
40643 true);
40645 /* V*DImode is emulated with 5-8 insns. */
40646 else if (mode == V2DImode || mode == V4DImode)
40648 if (TARGET_XOP && mode == V2DImode)
40649 return ix86_vec_cost (mode,
40650 cost->mulss * 2 + cost->sse_op * 3,
40651 true);
40652 else
40653 return ix86_vec_cost (mode,
40654 cost->mulss * 3 + cost->sse_op * 5,
40655 true);
40657 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
40658 insns, including two PMULUDQ. */
40659 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
40660 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5,
40661 true);
40662 else
40663 return ix86_vec_cost (mode, cost->mulss, true);
40665 else
40666 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
40669 /* Return cost of multiplication in MODE. */
40671 static int
40672 ix86_division_cost (const struct processor_costs *cost,
40673 enum machine_mode mode)
40675 machine_mode inner_mode = mode;
40676 if (VECTOR_MODE_P (mode))
40677 inner_mode = GET_MODE_INNER (mode);
40679 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40680 return inner_mode == DFmode ? cost->divsd : cost->divss;
40681 else if (X87_FLOAT_MODE_P (mode))
40682 return cost->fdiv;
40683 else if (FLOAT_MODE_P (mode))
40684 return ix86_vec_cost (mode,
40685 inner_mode == DFmode ? cost->divsd : cost->divss,
40686 true);
40687 else
40688 return cost->divide[MODE_INDEX (mode)];
40691 /* Return cost of shift in MODE.
40692 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
40693 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
40694 if op1 is a result of subreg.
40696 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
40698 static int
40699 ix86_shift_rotate_cost (const struct processor_costs *cost,
40700 enum machine_mode mode, bool constant_op1,
40701 HOST_WIDE_INT op1_val,
40702 bool speed,
40703 bool and_in_op1,
40704 bool shift_and_truncate,
40705 bool *skip_op0, bool *skip_op1)
40707 if (skip_op0)
40708 *skip_op0 = *skip_op1 = false;
40709 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
40711 /* V*QImode is emulated with 1-11 insns. */
40712 if (mode == V16QImode || mode == V32QImode)
40714 int count = 11;
40715 if (TARGET_XOP && mode == V16QImode)
40717 /* For XOP we use vpshab, which requires a broadcast of the
40718 value to the variable shift insn. For constants this
40719 means a V16Q const in mem; even when we can perform the
40720 shift with one insn set the cost to prefer paddb. */
40721 if (constant_op1)
40723 if (skip_op1)
40724 *skip_op1 = true;
40725 return ix86_vec_cost (mode,
40726 cost->sse_op
40727 + (speed
40729 : COSTS_N_BYTES
40730 (GET_MODE_UNIT_SIZE (mode))), true);
40732 count = 3;
40734 else if (TARGET_SSSE3)
40735 count = 7;
40736 return ix86_vec_cost (mode, cost->sse_op * count, true);
40738 else
40739 return ix86_vec_cost (mode, cost->sse_op, true);
40741 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
40743 if (constant_op1)
40745 if (op1_val > 32)
40746 return cost->shift_const + COSTS_N_INSNS (2);
40747 else
40748 return cost->shift_const * 2;
40750 else
40752 if (and_in_op1)
40753 return cost->shift_var * 2;
40754 else
40755 return cost->shift_var * 6 + COSTS_N_INSNS (2);
40758 else
40760 if (constant_op1)
40761 return cost->shift_const;
40762 else if (shift_and_truncate)
40764 if (skip_op0)
40765 *skip_op0 = *skip_op1 = true;
40766 /* Return the cost after shift-and truncation. */
40767 return cost->shift_var;
40769 else
40770 return cost->shift_var;
40772 return cost->shift_const;
40775 /* Compute a (partial) cost for rtx X. Return true if the complete
40776 cost has been computed, and false if subexpressions should be
40777 scanned. In either case, *TOTAL contains the cost result. */
40779 static bool
40780 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
40781 int *total, bool speed)
40783 rtx mask;
40784 enum rtx_code code = GET_CODE (x);
40785 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
40786 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
40787 int src_cost;
40789 switch (code)
40791 case SET:
40792 if (register_operand (SET_DEST (x), VOIDmode)
40793 && reg_or_0_operand (SET_SRC (x), VOIDmode))
40795 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
40796 return true;
40799 if (register_operand (SET_SRC (x), VOIDmode))
40800 /* Avoid potentially incorrect high cost from rtx_costs
40801 for non-tieable SUBREGs. */
40802 src_cost = 0;
40803 else
40805 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
40807 if (CONSTANT_P (SET_SRC (x)))
40808 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
40809 a small value, possibly zero for cheap constants. */
40810 src_cost += COSTS_N_INSNS (1);
40813 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
40814 return true;
40816 case CONST_INT:
40817 case CONST:
40818 case LABEL_REF:
40819 case SYMBOL_REF:
40820 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
40821 *total = 3;
40822 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
40823 *total = 2;
40824 else if (flag_pic && SYMBOLIC_CONST (x)
40825 && !(TARGET_64BIT
40826 && (GET_CODE (x) == LABEL_REF
40827 || (GET_CODE (x) == SYMBOL_REF
40828 && SYMBOL_REF_LOCAL_P (x))))
40829 /* Use 0 cost for CONST to improve its propagation. */
40830 && (TARGET_64BIT || GET_CODE (x) != CONST))
40831 *total = 1;
40832 else
40833 *total = 0;
40834 return true;
40836 case CONST_DOUBLE:
40837 if (IS_STACK_MODE (mode))
40838 switch (standard_80387_constant_p (x))
40840 case -1:
40841 case 0:
40842 break;
40843 case 1: /* 0.0 */
40844 *total = 1;
40845 return true;
40846 default: /* Other constants */
40847 *total = 2;
40848 return true;
40850 /* FALLTHRU */
40852 case CONST_VECTOR:
40853 switch (standard_sse_constant_p (x, mode))
40855 case 0:
40856 break;
40857 case 1: /* 0: xor eliminates false dependency */
40858 *total = 0;
40859 return true;
40860 default: /* -1: cmp contains false dependency */
40861 *total = 1;
40862 return true;
40864 /* FALLTHRU */
40866 case CONST_WIDE_INT:
40867 /* Fall back to (MEM (SYMBOL_REF)), since that's where
40868 it'll probably end up. Add a penalty for size. */
40869 *total = (COSTS_N_INSNS (1)
40870 + (!TARGET_64BIT && flag_pic)
40871 + (GET_MODE_SIZE (mode) <= 4
40872 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
40873 return true;
40875 case ZERO_EXTEND:
40876 /* The zero extensions is often completely free on x86_64, so make
40877 it as cheap as possible. */
40878 if (TARGET_64BIT && mode == DImode
40879 && GET_MODE (XEXP (x, 0)) == SImode)
40880 *total = 1;
40881 else if (TARGET_ZERO_EXTEND_WITH_AND)
40882 *total = cost->add;
40883 else
40884 *total = cost->movzx;
40885 return false;
40887 case SIGN_EXTEND:
40888 *total = cost->movsx;
40889 return false;
40891 case ASHIFT:
40892 if (SCALAR_INT_MODE_P (mode)
40893 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
40894 && CONST_INT_P (XEXP (x, 1)))
40896 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
40897 if (value == 1)
40899 *total = cost->add;
40900 return false;
40902 if ((value == 2 || value == 3)
40903 && cost->lea <= cost->shift_const)
40905 *total = cost->lea;
40906 return false;
40909 /* FALLTHRU */
40911 case ROTATE:
40912 case ASHIFTRT:
40913 case LSHIFTRT:
40914 case ROTATERT:
40915 bool skip_op0, skip_op1;
40916 *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)),
40917 CONST_INT_P (XEXP (x, 1))
40918 ? INTVAL (XEXP (x, 1)) : -1,
40919 speed,
40920 GET_CODE (XEXP (x, 1)) == AND,
40921 SUBREG_P (XEXP (x, 1))
40922 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND,
40923 &skip_op0, &skip_op1);
40924 if (skip_op0 || skip_op1)
40926 if (!skip_op0)
40927 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
40928 if (!skip_op1)
40929 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
40930 return true;
40932 return false;
40934 case FMA:
40936 rtx sub;
40938 gcc_assert (FLOAT_MODE_P (mode));
40939 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
40941 *total = ix86_vec_cost (mode,
40942 mode == SFmode ? cost->fmass : cost->fmasd,
40943 true);
40944 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
40946 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
40947 sub = XEXP (x, 0);
40948 if (GET_CODE (sub) == NEG)
40949 sub = XEXP (sub, 0);
40950 *total += rtx_cost (sub, mode, FMA, 0, speed);
40952 sub = XEXP (x, 2);
40953 if (GET_CODE (sub) == NEG)
40954 sub = XEXP (sub, 0);
40955 *total += rtx_cost (sub, mode, FMA, 2, speed);
40956 return true;
40959 case MULT:
40960 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
40962 rtx op0 = XEXP (x, 0);
40963 rtx op1 = XEXP (x, 1);
40964 int nbits;
40965 if (CONST_INT_P (XEXP (x, 1)))
40967 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
40968 for (nbits = 0; value != 0; value &= value - 1)
40969 nbits++;
40971 else
40972 /* This is arbitrary. */
40973 nbits = 7;
40975 /* Compute costs correctly for widening multiplication. */
40976 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
40977 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
40978 == GET_MODE_SIZE (mode))
40980 int is_mulwiden = 0;
40981 machine_mode inner_mode = GET_MODE (op0);
40983 if (GET_CODE (op0) == GET_CODE (op1))
40984 is_mulwiden = 1, op1 = XEXP (op1, 0);
40985 else if (CONST_INT_P (op1))
40987 if (GET_CODE (op0) == SIGN_EXTEND)
40988 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
40989 == INTVAL (op1);
40990 else
40991 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
40994 if (is_mulwiden)
40995 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
40998 *total = (cost->mult_init[MODE_INDEX (mode)]
40999 + nbits * cost->mult_bit
41000 + rtx_cost (op0, mode, outer_code, opno, speed)
41001 + rtx_cost (op1, mode, outer_code, opno, speed));
41003 return true;
41005 *total = ix86_multiplication_cost (cost, mode);
41006 return false;
41008 case DIV:
41009 case UDIV:
41010 case MOD:
41011 case UMOD:
41012 *total = ix86_division_cost (cost, mode);
41013 return false;
41015 case PLUS:
41016 if (GET_MODE_CLASS (mode) == MODE_INT
41017 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
41019 if (GET_CODE (XEXP (x, 0)) == PLUS
41020 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
41021 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
41022 && CONSTANT_P (XEXP (x, 1)))
41024 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
41025 if (val == 2 || val == 4 || val == 8)
41027 *total = cost->lea;
41028 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
41029 outer_code, opno, speed);
41030 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
41031 outer_code, opno, speed);
41032 *total += rtx_cost (XEXP (x, 1), mode,
41033 outer_code, opno, speed);
41034 return true;
41037 else if (GET_CODE (XEXP (x, 0)) == MULT
41038 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
41040 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
41041 if (val == 2 || val == 4 || val == 8)
41043 *total = cost->lea;
41044 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
41045 outer_code, opno, speed);
41046 *total += rtx_cost (XEXP (x, 1), mode,
41047 outer_code, opno, speed);
41048 return true;
41051 else if (GET_CODE (XEXP (x, 0)) == PLUS)
41053 /* Add with carry, ignore the cost of adding a carry flag. */
41054 if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
41055 *total = cost->add;
41056 else
41058 *total = cost->lea;
41059 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
41060 outer_code, opno, speed);
41063 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
41064 outer_code, opno, speed);
41065 *total += rtx_cost (XEXP (x, 1), mode,
41066 outer_code, opno, speed);
41067 return true;
41070 /* FALLTHRU */
41072 case MINUS:
41073 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
41074 if (GET_MODE_CLASS (mode) == MODE_INT
41075 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
41076 && GET_CODE (XEXP (x, 0)) == MINUS
41077 && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
41079 *total = cost->add;
41080 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
41081 outer_code, opno, speed);
41082 *total += rtx_cost (XEXP (x, 1), mode,
41083 outer_code, opno, speed);
41084 return true;
41087 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41089 *total = cost->addss;
41090 return false;
41092 else if (X87_FLOAT_MODE_P (mode))
41094 *total = cost->fadd;
41095 return false;
41097 else if (FLOAT_MODE_P (mode))
41099 *total = ix86_vec_cost (mode, cost->addss, true);
41100 return false;
41102 /* FALLTHRU */
41104 case AND:
41105 case IOR:
41106 case XOR:
41107 if (GET_MODE_CLASS (mode) == MODE_INT
41108 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41110 *total = (cost->add * 2
41111 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
41112 << (GET_MODE (XEXP (x, 0)) != DImode))
41113 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
41114 << (GET_MODE (XEXP (x, 1)) != DImode)));
41115 return true;
41117 /* FALLTHRU */
41119 case NEG:
41120 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41122 *total = cost->sse_op;
41123 return false;
41125 else if (X87_FLOAT_MODE_P (mode))
41127 *total = cost->fchs;
41128 return false;
41130 else if (FLOAT_MODE_P (mode))
41132 *total = ix86_vec_cost (mode, cost->sse_op, true);
41133 return false;
41135 /* FALLTHRU */
41137 case NOT:
41138 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
41139 *total = ix86_vec_cost (mode, cost->sse_op, true);
41140 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41141 *total = cost->add * 2;
41142 else
41143 *total = cost->add;
41144 return false;
41146 case COMPARE:
41147 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
41148 && XEXP (XEXP (x, 0), 1) == const1_rtx
41149 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
41150 && XEXP (x, 1) == const0_rtx)
41152 /* This kind of construct is implemented using test[bwl].
41153 Treat it as if we had an AND. */
41154 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
41155 *total = (cost->add
41156 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
41157 opno, speed)
41158 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
41159 return true;
41162 /* The embedded comparison operand is completely free. */
41163 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
41164 && XEXP (x, 1) == const0_rtx)
41165 *total = 0;
41167 return false;
41169 case FLOAT_EXTEND:
41170 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
41171 *total = 0;
41172 else
41173 *total = ix86_vec_cost (mode, cost->addss, true);
41174 return false;
41176 case FLOAT_TRUNCATE:
41177 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
41178 *total = cost->fadd;
41179 else
41180 *total = ix86_vec_cost (mode, cost->addss, true);
41181 return false;
41183 case ABS:
41184 /* SSE requires memory load for the constant operand. It may make
41185 sense to account for this. Of course the constant operand may or
41186 may not be reused. */
41187 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41188 *total = cost->sse_op;
41189 else if (X87_FLOAT_MODE_P (mode))
41190 *total = cost->fabs;
41191 else if (FLOAT_MODE_P (mode))
41192 *total = ix86_vec_cost (mode, cost->sse_op, true);
41193 return false;
41195 case SQRT:
41196 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41197 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
41198 else if (X87_FLOAT_MODE_P (mode))
41199 *total = cost->fsqrt;
41200 else if (FLOAT_MODE_P (mode))
41201 *total = ix86_vec_cost (mode,
41202 mode == SFmode ? cost->sqrtss : cost->sqrtsd,
41203 true);
41204 return false;
41206 case UNSPEC:
41207 if (XINT (x, 1) == UNSPEC_TP)
41208 *total = 0;
41209 return false;
41211 case VEC_SELECT:
41212 case VEC_CONCAT:
41213 case VEC_DUPLICATE:
41214 /* ??? Assume all of these vector manipulation patterns are
41215 recognizable. In which case they all pretty much have the
41216 same cost. */
41217 *total = cost->sse_op;
41218 return true;
41219 case VEC_MERGE:
41220 mask = XEXP (x, 2);
41221 /* This is masked instruction, assume the same cost,
41222 as nonmasked variant. */
41223 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
41224 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
41225 else
41226 *total = cost->sse_op;
41227 return true;
41229 default:
41230 return false;
41234 #if TARGET_MACHO
41236 static int current_machopic_label_num;
41238 /* Given a symbol name and its associated stub, write out the
41239 definition of the stub. */
41241 void
41242 machopic_output_stub (FILE *file, const char *symb, const char *stub)
41244 unsigned int length;
41245 char *binder_name, *symbol_name, lazy_ptr_name[32];
41246 int label = ++current_machopic_label_num;
41248 /* For 64-bit we shouldn't get here. */
41249 gcc_assert (!TARGET_64BIT);
41251 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
41252 symb = targetm.strip_name_encoding (symb);
41254 length = strlen (stub);
41255 binder_name = XALLOCAVEC (char, length + 32);
41256 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
41258 length = strlen (symb);
41259 symbol_name = XALLOCAVEC (char, length + 32);
41260 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
41262 sprintf (lazy_ptr_name, "L%d$lz", label);
41264 if (MACHOPIC_ATT_STUB)
41265 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
41266 else if (MACHOPIC_PURE)
41267 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
41268 else
41269 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
41271 fprintf (file, "%s:\n", stub);
41272 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
41274 if (MACHOPIC_ATT_STUB)
41276 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
41278 else if (MACHOPIC_PURE)
41280 /* PIC stub. */
41281 /* 25-byte PIC stub using "CALL get_pc_thunk". */
41282 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
41283 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
41284 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
41285 label, lazy_ptr_name, label);
41286 fprintf (file, "\tjmp\t*%%ecx\n");
41288 else
41289 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
41291 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
41292 it needs no stub-binding-helper. */
41293 if (MACHOPIC_ATT_STUB)
41294 return;
41296 fprintf (file, "%s:\n", binder_name);
41298 if (MACHOPIC_PURE)
41300 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
41301 fprintf (file, "\tpushl\t%%ecx\n");
41303 else
41304 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
41306 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
41308 /* N.B. Keep the correspondence of these
41309 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
41310 old-pic/new-pic/non-pic stubs; altering this will break
41311 compatibility with existing dylibs. */
41312 if (MACHOPIC_PURE)
41314 /* 25-byte PIC stub using "CALL get_pc_thunk". */
41315 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
41317 else
41318 /* 16-byte -mdynamic-no-pic stub. */
41319 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
41321 fprintf (file, "%s:\n", lazy_ptr_name);
41322 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
41323 fprintf (file, ASM_LONG "%s\n", binder_name);
41325 #endif /* TARGET_MACHO */
41327 /* Order the registers for register allocator. */
41329 void
41330 x86_order_regs_for_local_alloc (void)
41332 int pos = 0;
41333 int i;
41335 /* First allocate the local general purpose registers. */
41336 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41337 if (GENERAL_REGNO_P (i) && call_used_regs[i])
41338 reg_alloc_order [pos++] = i;
41340 /* Global general purpose registers. */
41341 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41342 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
41343 reg_alloc_order [pos++] = i;
41345 /* x87 registers come first in case we are doing FP math
41346 using them. */
41347 if (!TARGET_SSE_MATH)
41348 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
41349 reg_alloc_order [pos++] = i;
41351 /* SSE registers. */
41352 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
41353 reg_alloc_order [pos++] = i;
41354 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
41355 reg_alloc_order [pos++] = i;
41357 /* Extended REX SSE registers. */
41358 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
41359 reg_alloc_order [pos++] = i;
41361 /* Mask register. */
41362 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
41363 reg_alloc_order [pos++] = i;
41365 /* MPX bound registers. */
41366 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
41367 reg_alloc_order [pos++] = i;
41369 /* x87 registers. */
41370 if (TARGET_SSE_MATH)
41371 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
41372 reg_alloc_order [pos++] = i;
41374 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
41375 reg_alloc_order [pos++] = i;
41377 /* Initialize the rest of array as we do not allocate some registers
41378 at all. */
41379 while (pos < FIRST_PSEUDO_REGISTER)
41380 reg_alloc_order [pos++] = 0;
41383 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
41384 in struct attribute_spec handler. */
41385 static tree
41386 ix86_handle_callee_pop_aggregate_return (tree *node, tree name, tree args, int,
41387 bool *no_add_attrs)
41389 if (TREE_CODE (*node) != FUNCTION_TYPE
41390 && TREE_CODE (*node) != METHOD_TYPE
41391 && TREE_CODE (*node) != FIELD_DECL
41392 && TREE_CODE (*node) != TYPE_DECL)
41394 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41395 name);
41396 *no_add_attrs = true;
41397 return NULL_TREE;
41399 if (TARGET_64BIT)
41401 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
41402 name);
41403 *no_add_attrs = true;
41404 return NULL_TREE;
41406 if (is_attribute_p ("callee_pop_aggregate_return", name))
41408 tree cst;
41410 cst = TREE_VALUE (args);
41411 if (TREE_CODE (cst) != INTEGER_CST)
41413 warning (OPT_Wattributes,
41414 "%qE attribute requires an integer constant argument",
41415 name);
41416 *no_add_attrs = true;
41418 else if (compare_tree_int (cst, 0) != 0
41419 && compare_tree_int (cst, 1) != 0)
41421 warning (OPT_Wattributes,
41422 "argument to %qE attribute is neither zero, nor one",
41423 name);
41424 *no_add_attrs = true;
41427 return NULL_TREE;
41430 return NULL_TREE;
41433 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
41434 struct attribute_spec.handler. */
41435 static tree
41436 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
41437 bool *no_add_attrs)
41439 if (TREE_CODE (*node) != FUNCTION_TYPE
41440 && TREE_CODE (*node) != METHOD_TYPE
41441 && TREE_CODE (*node) != FIELD_DECL
41442 && TREE_CODE (*node) != TYPE_DECL)
41444 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41445 name);
41446 *no_add_attrs = true;
41447 return NULL_TREE;
41450 /* Can combine regparm with all attributes but fastcall. */
41451 if (is_attribute_p ("ms_abi", name))
41453 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
41455 error ("ms_abi and sysv_abi attributes are not compatible");
41458 return NULL_TREE;
41460 else if (is_attribute_p ("sysv_abi", name))
41462 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
41464 error ("ms_abi and sysv_abi attributes are not compatible");
41467 return NULL_TREE;
41470 return NULL_TREE;
41473 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
41474 struct attribute_spec.handler. */
41475 static tree
41476 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
41477 bool *no_add_attrs)
41479 tree *type = NULL;
41480 if (DECL_P (*node))
41482 if (TREE_CODE (*node) == TYPE_DECL)
41483 type = &TREE_TYPE (*node);
41485 else
41486 type = node;
41488 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
41490 warning (OPT_Wattributes, "%qE attribute ignored",
41491 name);
41492 *no_add_attrs = true;
41495 else if ((is_attribute_p ("ms_struct", name)
41496 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
41497 || ((is_attribute_p ("gcc_struct", name)
41498 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
41500 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
41501 name);
41502 *no_add_attrs = true;
41505 return NULL_TREE;
41508 static tree
41509 ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int,
41510 bool *no_add_attrs)
41512 if (TREE_CODE (*node) != FUNCTION_DECL)
41514 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41515 name);
41516 *no_add_attrs = true;
41519 if (is_attribute_p ("indirect_branch", name))
41521 tree cst = TREE_VALUE (args);
41522 if (TREE_CODE (cst) != STRING_CST)
41524 warning (OPT_Wattributes,
41525 "%qE attribute requires a string constant argument",
41526 name);
41527 *no_add_attrs = true;
41529 else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
41530 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
41531 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
41532 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
41534 warning (OPT_Wattributes,
41535 "argument to %qE attribute is not "
41536 "(keep|thunk|thunk-inline|thunk-extern)", name);
41537 *no_add_attrs = true;
41541 if (is_attribute_p ("function_return", name))
41543 tree cst = TREE_VALUE (args);
41544 if (TREE_CODE (cst) != STRING_CST)
41546 warning (OPT_Wattributes,
41547 "%qE attribute requires a string constant argument",
41548 name);
41549 *no_add_attrs = true;
41551 else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
41552 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
41553 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
41554 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
41556 warning (OPT_Wattributes,
41557 "argument to %qE attribute is not "
41558 "(keep|thunk|thunk-inline|thunk-extern)", name);
41559 *no_add_attrs = true;
41563 return NULL_TREE;
41566 static tree
41567 ix86_handle_no_caller_saved_registers_attribute (tree *, tree, tree,
41568 int, bool *)
41570 return NULL_TREE;
41573 static tree
41574 ix86_handle_interrupt_attribute (tree *node, tree, tree, int, bool *)
41576 /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet,
41577 but the function type contains args and return type data. */
41578 tree func_type = *node;
41579 tree return_type = TREE_TYPE (func_type);
41581 int nargs = 0;
41582 tree current_arg_type = TYPE_ARG_TYPES (func_type);
41583 while (current_arg_type
41584 && ! VOID_TYPE_P (TREE_VALUE (current_arg_type)))
41586 if (nargs == 0)
41588 if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type)))
41589 error ("interrupt service routine should have a pointer "
41590 "as the first argument");
41592 else if (nargs == 1)
41594 if (TREE_CODE (TREE_VALUE (current_arg_type)) != INTEGER_TYPE
41595 || TYPE_MODE (TREE_VALUE (current_arg_type)) != word_mode)
41596 error ("interrupt service routine should have unsigned %s"
41597 "int as the second argument",
41598 TARGET_64BIT
41599 ? (TARGET_X32 ? "long long " : "long ")
41600 : "");
41602 nargs++;
41603 current_arg_type = TREE_CHAIN (current_arg_type);
41605 if (!nargs || nargs > 2)
41606 error ("interrupt service routine can only have a pointer argument "
41607 "and an optional integer argument");
41608 if (! VOID_TYPE_P (return_type))
41609 error ("interrupt service routine can't have non-void return value");
41611 return NULL_TREE;
41614 static bool
41615 ix86_ms_bitfield_layout_p (const_tree record_type)
41617 return ((TARGET_MS_BITFIELD_LAYOUT
41618 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
41619 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
41622 /* Returns an expression indicating where the this parameter is
41623 located on entry to the FUNCTION. */
41625 static rtx
41626 x86_this_parameter (tree function)
41628 tree type = TREE_TYPE (function);
41629 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
41630 int nregs;
41632 if (TARGET_64BIT)
41634 const int *parm_regs;
41636 if (ix86_function_type_abi (type) == MS_ABI)
41637 parm_regs = x86_64_ms_abi_int_parameter_registers;
41638 else
41639 parm_regs = x86_64_int_parameter_registers;
41640 return gen_rtx_REG (Pmode, parm_regs[aggr]);
41643 nregs = ix86_function_regparm (type, function);
41645 if (nregs > 0 && !stdarg_p (type))
41647 int regno;
41648 unsigned int ccvt = ix86_get_callcvt (type);
41650 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
41651 regno = aggr ? DX_REG : CX_REG;
41652 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
41654 regno = CX_REG;
41655 if (aggr)
41656 return gen_rtx_MEM (SImode,
41657 plus_constant (Pmode, stack_pointer_rtx, 4));
41659 else
41661 regno = AX_REG;
41662 if (aggr)
41664 regno = DX_REG;
41665 if (nregs == 1)
41666 return gen_rtx_MEM (SImode,
41667 plus_constant (Pmode,
41668 stack_pointer_rtx, 4));
41671 return gen_rtx_REG (SImode, regno);
41674 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
41675 aggr ? 8 : 4));
41678 /* Determine whether x86_output_mi_thunk can succeed. */
41680 static bool
41681 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
41682 const_tree function)
41684 /* 64-bit can handle anything. */
41685 if (TARGET_64BIT)
41686 return true;
41688 /* For 32-bit, everything's fine if we have one free register. */
41689 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
41690 return true;
41692 /* Need a free register for vcall_offset. */
41693 if (vcall_offset)
41694 return false;
41696 /* Need a free register for GOT references. */
41697 if (flag_pic && !targetm.binds_local_p (function))
41698 return false;
41700 /* Otherwise ok. */
41701 return true;
41704 /* Output the assembler code for a thunk function. THUNK_DECL is the
41705 declaration for the thunk function itself, FUNCTION is the decl for
41706 the target function. DELTA is an immediate constant offset to be
41707 added to THIS. If VCALL_OFFSET is nonzero, the word at
41708 *(*this + vcall_offset) should be added to THIS. */
41710 static void
41711 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
41712 HOST_WIDE_INT vcall_offset, tree function)
41714 rtx this_param = x86_this_parameter (function);
41715 rtx this_reg, tmp, fnaddr;
41716 unsigned int tmp_regno;
41717 rtx_insn *insn;
41719 if (TARGET_64BIT)
41720 tmp_regno = R10_REG;
41721 else
41723 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
41724 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
41725 tmp_regno = AX_REG;
41726 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
41727 tmp_regno = DX_REG;
41728 else
41729 tmp_regno = CX_REG;
41732 emit_note (NOTE_INSN_PROLOGUE_END);
41734 /* CET is enabled, insert EB instruction. */
41735 if ((flag_cf_protection & CF_BRANCH) && TARGET_IBT)
41736 emit_insn (gen_nop_endbr ());
41738 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
41739 pull it in now and let DELTA benefit. */
41740 if (REG_P (this_param))
41741 this_reg = this_param;
41742 else if (vcall_offset)
41744 /* Put the this parameter into %eax. */
41745 this_reg = gen_rtx_REG (Pmode, AX_REG);
41746 emit_move_insn (this_reg, this_param);
41748 else
41749 this_reg = NULL_RTX;
41751 /* Adjust the this parameter by a fixed constant. */
41752 if (delta)
41754 rtx delta_rtx = GEN_INT (delta);
41755 rtx delta_dst = this_reg ? this_reg : this_param;
41757 if (TARGET_64BIT)
41759 if (!x86_64_general_operand (delta_rtx, Pmode))
41761 tmp = gen_rtx_REG (Pmode, tmp_regno);
41762 emit_move_insn (tmp, delta_rtx);
41763 delta_rtx = tmp;
41767 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
41770 /* Adjust the this parameter by a value stored in the vtable. */
41771 if (vcall_offset)
41773 rtx vcall_addr, vcall_mem, this_mem;
41775 tmp = gen_rtx_REG (Pmode, tmp_regno);
41777 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
41778 if (Pmode != ptr_mode)
41779 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
41780 emit_move_insn (tmp, this_mem);
41782 /* Adjust the this parameter. */
41783 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
41784 if (TARGET_64BIT
41785 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
41787 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
41788 emit_move_insn (tmp2, GEN_INT (vcall_offset));
41789 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
41792 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
41793 if (Pmode != ptr_mode)
41794 emit_insn (gen_addsi_1_zext (this_reg,
41795 gen_rtx_REG (ptr_mode,
41796 REGNO (this_reg)),
41797 vcall_mem));
41798 else
41799 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
41802 /* If necessary, drop THIS back to its stack slot. */
41803 if (this_reg && this_reg != this_param)
41804 emit_move_insn (this_param, this_reg);
41806 fnaddr = XEXP (DECL_RTL (function), 0);
41807 if (TARGET_64BIT)
41809 if (!flag_pic || targetm.binds_local_p (function)
41810 || TARGET_PECOFF)
41812 else
41814 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
41815 tmp = gen_rtx_CONST (Pmode, tmp);
41816 fnaddr = gen_const_mem (Pmode, tmp);
41819 else
41821 if (!flag_pic || targetm.binds_local_p (function))
41823 #if TARGET_MACHO
41824 else if (TARGET_MACHO)
41826 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
41827 fnaddr = XEXP (fnaddr, 0);
41829 #endif /* TARGET_MACHO */
41830 else
41832 tmp = gen_rtx_REG (Pmode, CX_REG);
41833 output_set_got (tmp, NULL_RTX);
41835 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
41836 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
41837 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
41838 fnaddr = gen_const_mem (Pmode, fnaddr);
41842 /* Our sibling call patterns do not allow memories, because we have no
41843 predicate that can distinguish between frame and non-frame memory.
41844 For our purposes here, we can get away with (ab)using a jump pattern,
41845 because we're going to do no optimization. */
41846 if (MEM_P (fnaddr))
41848 if (sibcall_insn_operand (fnaddr, word_mode))
41850 fnaddr = XEXP (DECL_RTL (function), 0);
41851 tmp = gen_rtx_MEM (QImode, fnaddr);
41852 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
41853 tmp = emit_call_insn (tmp);
41854 SIBLING_CALL_P (tmp) = 1;
41856 else
41857 emit_jump_insn (gen_indirect_jump (fnaddr));
41859 else
41861 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
41863 // CM_LARGE_PIC always uses pseudo PIC register which is
41864 // uninitialized. Since FUNCTION is local and calling it
41865 // doesn't go through PLT, we use scratch register %r11 as
41866 // PIC register and initialize it here.
41867 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
41868 ix86_init_large_pic_reg (tmp_regno);
41869 fnaddr = legitimize_pic_address (fnaddr,
41870 gen_rtx_REG (Pmode, tmp_regno));
41873 if (!sibcall_insn_operand (fnaddr, word_mode))
41875 tmp = gen_rtx_REG (word_mode, tmp_regno);
41876 if (GET_MODE (fnaddr) != word_mode)
41877 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
41878 emit_move_insn (tmp, fnaddr);
41879 fnaddr = tmp;
41882 tmp = gen_rtx_MEM (QImode, fnaddr);
41883 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
41884 tmp = emit_call_insn (tmp);
41885 SIBLING_CALL_P (tmp) = 1;
41887 emit_barrier ();
41889 /* Emit just enough of rest_of_compilation to get the insns emitted.
41890 Note that use_thunk calls assemble_start_function et al. */
41891 insn = get_insns ();
41892 shorten_branches (insn);
41893 final_start_function (insn, file, 1);
41894 final (insn, file, 1);
41895 final_end_function ();
41898 static void
41899 x86_file_start (void)
41901 default_file_start ();
41902 if (TARGET_16BIT)
41903 fputs ("\t.code16gcc\n", asm_out_file);
41904 #if TARGET_MACHO
41905 darwin_file_start ();
41906 #endif
41907 if (X86_FILE_START_VERSION_DIRECTIVE)
41908 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
41909 if (X86_FILE_START_FLTUSED)
41910 fputs ("\t.global\t__fltused\n", asm_out_file);
41911 if (ix86_asm_dialect == ASM_INTEL)
41912 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
41916 x86_field_alignment (tree type, int computed)
41918 machine_mode mode;
41920 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
41921 return computed;
41922 if (TARGET_IAMCU)
41923 return iamcu_alignment (type, computed);
41924 mode = TYPE_MODE (strip_array_types (type));
41925 if (mode == DFmode || mode == DCmode
41926 || GET_MODE_CLASS (mode) == MODE_INT
41927 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
41928 return MIN (32, computed);
41929 return computed;
41932 /* Print call to TARGET to FILE. */
41934 static void
41935 x86_print_call_or_nop (FILE *file, const char *target)
41937 if (flag_nop_mcount)
41938 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
41939 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
41940 else
41941 fprintf (file, "1:\tcall\t%s\n", target);
41944 /* Output assembler code to FILE to increment profiler label # LABELNO
41945 for profiling a function entry. */
41946 void
41947 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
41949 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
41950 : MCOUNT_NAME);
41951 if (TARGET_64BIT)
41953 #ifndef NO_PROFILE_COUNTERS
41954 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
41955 #endif
41957 if (!TARGET_PECOFF && flag_pic)
41958 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
41959 else
41960 x86_print_call_or_nop (file, mcount_name);
41962 else if (flag_pic)
41964 #ifndef NO_PROFILE_COUNTERS
41965 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
41966 LPREFIX, labelno);
41967 #endif
41968 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
41970 else
41972 #ifndef NO_PROFILE_COUNTERS
41973 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
41974 LPREFIX, labelno);
41975 #endif
41976 x86_print_call_or_nop (file, mcount_name);
41979 if (flag_record_mcount)
41981 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
41982 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
41983 fprintf (file, "\t.previous\n");
41987 /* We don't have exact information about the insn sizes, but we may assume
41988 quite safely that we are informed about all 1 byte insns and memory
41989 address sizes. This is enough to eliminate unnecessary padding in
41990 99% of cases. */
41993 ix86_min_insn_size (rtx_insn *insn)
41995 int l = 0, len;
41997 if (!INSN_P (insn) || !active_insn_p (insn))
41998 return 0;
42000 /* Discard alignments we've emit and jump instructions. */
42001 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
42002 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
42003 return 0;
42005 /* Important case - calls are always 5 bytes.
42006 It is common to have many calls in the row. */
42007 if (CALL_P (insn)
42008 && symbolic_reference_mentioned_p (PATTERN (insn))
42009 && !SIBLING_CALL_P (insn))
42010 return 5;
42011 len = get_attr_length (insn);
42012 if (len <= 1)
42013 return 1;
42015 /* For normal instructions we rely on get_attr_length being exact,
42016 with a few exceptions. */
42017 if (!JUMP_P (insn))
42019 enum attr_type type = get_attr_type (insn);
42021 switch (type)
42023 case TYPE_MULTI:
42024 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
42025 || asm_noperands (PATTERN (insn)) >= 0)
42026 return 0;
42027 break;
42028 case TYPE_OTHER:
42029 case TYPE_FCMP:
42030 break;
42031 default:
42032 /* Otherwise trust get_attr_length. */
42033 return len;
42036 l = get_attr_length_address (insn);
42037 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
42038 l = 4;
42040 if (l)
42041 return 1+l;
42042 else
42043 return 2;
42046 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
42048 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
42049 window. */
42051 static void
42052 ix86_avoid_jump_mispredicts (void)
42054 rtx_insn *insn, *start = get_insns ();
42055 int nbytes = 0, njumps = 0;
42056 bool isjump = false;
42058 /* Look for all minimal intervals of instructions containing 4 jumps.
42059 The intervals are bounded by START and INSN. NBYTES is the total
42060 size of instructions in the interval including INSN and not including
42061 START. When the NBYTES is smaller than 16 bytes, it is possible
42062 that the end of START and INSN ends up in the same 16byte page.
42064 The smallest offset in the page INSN can start is the case where START
42065 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
42066 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
42068 Don't consider asm goto as jump, while it can contain a jump, it doesn't
42069 have to, control transfer to label(s) can be performed through other
42070 means, and also we estimate minimum length of all asm stmts as 0. */
42071 for (insn = start; insn; insn = NEXT_INSN (insn))
42073 int min_size;
42075 if (LABEL_P (insn))
42077 int align = label_to_alignment (insn);
42078 int max_skip = label_to_max_skip (insn);
42080 if (max_skip > 15)
42081 max_skip = 15;
42082 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
42083 already in the current 16 byte page, because otherwise
42084 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
42085 bytes to reach 16 byte boundary. */
42086 if (align <= 0
42087 || (align <= 3 && max_skip != (1 << align) - 1))
42088 max_skip = 0;
42089 if (dump_file)
42090 fprintf (dump_file, "Label %i with max_skip %i\n",
42091 INSN_UID (insn), max_skip);
42092 if (max_skip)
42094 while (nbytes + max_skip >= 16)
42096 start = NEXT_INSN (start);
42097 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
42098 || CALL_P (start))
42099 njumps--, isjump = true;
42100 else
42101 isjump = false;
42102 nbytes -= ix86_min_insn_size (start);
42105 continue;
42108 min_size = ix86_min_insn_size (insn);
42109 nbytes += min_size;
42110 if (dump_file)
42111 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
42112 INSN_UID (insn), min_size);
42113 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
42114 || CALL_P (insn))
42115 njumps++;
42116 else
42117 continue;
42119 while (njumps > 3)
42121 start = NEXT_INSN (start);
42122 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
42123 || CALL_P (start))
42124 njumps--, isjump = true;
42125 else
42126 isjump = false;
42127 nbytes -= ix86_min_insn_size (start);
42129 gcc_assert (njumps >= 0);
42130 if (dump_file)
42131 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
42132 INSN_UID (start), INSN_UID (insn), nbytes);
42134 if (njumps == 3 && isjump && nbytes < 16)
42136 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
42138 if (dump_file)
42139 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
42140 INSN_UID (insn), padsize);
42141 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
42145 #endif
42147 /* AMD Athlon works faster
42148 when RET is not destination of conditional jump or directly preceded
42149 by other jump instruction. We avoid the penalty by inserting NOP just
42150 before the RET instructions in such cases. */
42151 static void
42152 ix86_pad_returns (void)
42154 edge e;
42155 edge_iterator ei;
42157 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42159 basic_block bb = e->src;
42160 rtx_insn *ret = BB_END (bb);
42161 rtx_insn *prev;
42162 bool replace = false;
42164 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
42165 || optimize_bb_for_size_p (bb))
42166 continue;
42167 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
42168 if (active_insn_p (prev) || LABEL_P (prev))
42169 break;
42170 if (prev && LABEL_P (prev))
42172 edge e;
42173 edge_iterator ei;
42175 FOR_EACH_EDGE (e, ei, bb->preds)
42176 if (EDGE_FREQUENCY (e) && e->src->index >= 0
42177 && !(e->flags & EDGE_FALLTHRU))
42179 replace = true;
42180 break;
42183 if (!replace)
42185 prev = prev_active_insn (ret);
42186 if (prev
42187 && ((JUMP_P (prev) && any_condjump_p (prev))
42188 || CALL_P (prev)))
42189 replace = true;
42190 /* Empty functions get branch mispredict even when
42191 the jump destination is not visible to us. */
42192 if (!prev && !optimize_function_for_size_p (cfun))
42193 replace = true;
42195 if (replace)
42197 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
42198 delete_insn (ret);
42203 /* Count the minimum number of instructions in BB. Return 4 if the
42204 number of instructions >= 4. */
42206 static int
42207 ix86_count_insn_bb (basic_block bb)
42209 rtx_insn *insn;
42210 int insn_count = 0;
42212 /* Count number of instructions in this block. Return 4 if the number
42213 of instructions >= 4. */
42214 FOR_BB_INSNS (bb, insn)
42216 /* Only happen in exit blocks. */
42217 if (JUMP_P (insn)
42218 && ANY_RETURN_P (PATTERN (insn)))
42219 break;
42221 if (NONDEBUG_INSN_P (insn)
42222 && GET_CODE (PATTERN (insn)) != USE
42223 && GET_CODE (PATTERN (insn)) != CLOBBER)
42225 insn_count++;
42226 if (insn_count >= 4)
42227 return insn_count;
42231 return insn_count;
42235 /* Count the minimum number of instructions in code path in BB.
42236 Return 4 if the number of instructions >= 4. */
42238 static int
42239 ix86_count_insn (basic_block bb)
42241 edge e;
42242 edge_iterator ei;
42243 int min_prev_count;
42245 /* Only bother counting instructions along paths with no
42246 more than 2 basic blocks between entry and exit. Given
42247 that BB has an edge to exit, determine if a predecessor
42248 of BB has an edge from entry. If so, compute the number
42249 of instructions in the predecessor block. If there
42250 happen to be multiple such blocks, compute the minimum. */
42251 min_prev_count = 4;
42252 FOR_EACH_EDGE (e, ei, bb->preds)
42254 edge prev_e;
42255 edge_iterator prev_ei;
42257 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
42259 min_prev_count = 0;
42260 break;
42262 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
42264 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
42266 int count = ix86_count_insn_bb (e->src);
42267 if (count < min_prev_count)
42268 min_prev_count = count;
42269 break;
42274 if (min_prev_count < 4)
42275 min_prev_count += ix86_count_insn_bb (bb);
42277 return min_prev_count;
42280 /* Pad short function to 4 instructions. */
42282 static void
42283 ix86_pad_short_function (void)
42285 edge e;
42286 edge_iterator ei;
42288 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42290 rtx_insn *ret = BB_END (e->src);
42291 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
42293 int insn_count = ix86_count_insn (e->src);
42295 /* Pad short function. */
42296 if (insn_count < 4)
42298 rtx_insn *insn = ret;
42300 /* Find epilogue. */
42301 while (insn
42302 && (!NOTE_P (insn)
42303 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
42304 insn = PREV_INSN (insn);
42306 if (!insn)
42307 insn = ret;
42309 /* Two NOPs count as one instruction. */
42310 insn_count = 2 * (4 - insn_count);
42311 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
42317 /* Fix up a Windows system unwinder issue. If an EH region falls through into
42318 the epilogue, the Windows system unwinder will apply epilogue logic and
42319 produce incorrect offsets. This can be avoided by adding a nop between
42320 the last insn that can throw and the first insn of the epilogue. */
42322 static void
42323 ix86_seh_fixup_eh_fallthru (void)
42325 edge e;
42326 edge_iterator ei;
42328 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42330 rtx_insn *insn, *next;
42332 /* Find the beginning of the epilogue. */
42333 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
42334 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
42335 break;
42336 if (insn == NULL)
42337 continue;
42339 /* We only care about preceding insns that can throw. */
42340 insn = prev_active_insn (insn);
42341 if (insn == NULL || !can_throw_internal (insn))
42342 continue;
42344 /* Do not separate calls from their debug information. */
42345 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
42346 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
42347 insn = next;
42348 else
42349 break;
42351 emit_insn_after (gen_nops (const1_rtx), insn);
42355 /* Given a register number BASE, the lowest of a group of registers, update
42356 regsets IN and OUT with the registers that should be avoided in input
42357 and output operands respectively when trying to avoid generating a modr/m
42358 byte for -mmitigate-rop. */
42360 static void
42361 set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out)
42363 SET_HARD_REG_BIT (out, base);
42364 SET_HARD_REG_BIT (out, base + 1);
42365 SET_HARD_REG_BIT (in, base + 2);
42366 SET_HARD_REG_BIT (in, base + 3);
42369 /* Called if -mmitigate-rop is in effect. Try to rewrite instructions so
42370 that certain encodings of modr/m bytes do not occur. */
42371 static void
42372 ix86_mitigate_rop (void)
42374 HARD_REG_SET input_risky;
42375 HARD_REG_SET output_risky;
42376 HARD_REG_SET inout_risky;
42378 CLEAR_HARD_REG_SET (output_risky);
42379 CLEAR_HARD_REG_SET (input_risky);
42380 SET_HARD_REG_BIT (output_risky, AX_REG);
42381 SET_HARD_REG_BIT (output_risky, CX_REG);
42382 SET_HARD_REG_BIT (input_risky, BX_REG);
42383 SET_HARD_REG_BIT (input_risky, DX_REG);
42384 set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky);
42385 set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky);
42386 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky);
42387 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky);
42388 set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky);
42389 set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky);
42390 COPY_HARD_REG_SET (inout_risky, input_risky);
42391 IOR_HARD_REG_SET (inout_risky, output_risky);
42393 df_note_add_problem ();
42394 /* Fix up what stack-regs did. */
42395 df_insn_rescan_all ();
42396 df_analyze ();
42398 regrename_init (true);
42399 regrename_analyze (NULL);
42401 auto_vec<du_head_p> cands;
42403 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
42405 if (!NONDEBUG_INSN_P (insn))
42406 continue;
42408 if (GET_CODE (PATTERN (insn)) == USE
42409 || GET_CODE (PATTERN (insn)) == CLOBBER)
42410 continue;
42412 extract_insn (insn);
42414 int opno0, opno1;
42415 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
42416 recog_data.n_operands, &opno0,
42417 &opno1);
42419 if (!ix86_rop_should_change_byte_p (modrm))
42420 continue;
42422 insn_rr_info *info = &insn_rr[INSN_UID (insn)];
42424 /* This happens when regrename has to fail a block. */
42425 if (!info->op_info)
42426 continue;
42428 if (info->op_info[opno0].n_chains != 0)
42430 gcc_assert (info->op_info[opno0].n_chains == 1);
42431 du_head_p op0c;
42432 op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id);
42433 if (op0c->target_data_1 + op0c->target_data_2 == 0
42434 && !op0c->cannot_rename)
42435 cands.safe_push (op0c);
42437 op0c->target_data_1++;
42439 if (info->op_info[opno1].n_chains != 0)
42441 gcc_assert (info->op_info[opno1].n_chains == 1);
42442 du_head_p op1c;
42443 op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id);
42444 if (op1c->target_data_1 + op1c->target_data_2 == 0
42445 && !op1c->cannot_rename)
42446 cands.safe_push (op1c);
42448 op1c->target_data_2++;
42452 int i;
42453 du_head_p head;
42454 FOR_EACH_VEC_ELT (cands, i, head)
42456 int old_reg, best_reg;
42457 HARD_REG_SET unavailable;
42459 CLEAR_HARD_REG_SET (unavailable);
42460 if (head->target_data_1)
42461 IOR_HARD_REG_SET (unavailable, output_risky);
42462 if (head->target_data_2)
42463 IOR_HARD_REG_SET (unavailable, input_risky);
42465 int n_uses;
42466 reg_class superclass = regrename_find_superclass (head, &n_uses,
42467 &unavailable);
42468 old_reg = head->regno;
42469 best_reg = find_rename_reg (head, superclass, &unavailable,
42470 old_reg, false);
42471 bool ok = regrename_do_replace (head, best_reg);
42472 gcc_assert (ok);
42473 if (dump_file)
42474 fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id,
42475 reg_names[best_reg], reg_class_names[superclass]);
42479 regrename_finish ();
42481 df_analyze ();
42483 basic_block bb;
42484 regset_head live;
42486 INIT_REG_SET (&live);
42488 FOR_EACH_BB_FN (bb, cfun)
42490 rtx_insn *insn;
42492 COPY_REG_SET (&live, DF_LR_OUT (bb));
42493 df_simulate_initialize_backwards (bb, &live);
42495 FOR_BB_INSNS_REVERSE (bb, insn)
42497 if (!NONDEBUG_INSN_P (insn))
42498 continue;
42500 df_simulate_one_insn_backwards (bb, insn, &live);
42502 if (GET_CODE (PATTERN (insn)) == USE
42503 || GET_CODE (PATTERN (insn)) == CLOBBER)
42504 continue;
42506 extract_insn (insn);
42507 constrain_operands_cached (insn, reload_completed);
42508 int opno0, opno1;
42509 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
42510 recog_data.n_operands, &opno0,
42511 &opno1);
42512 if (modrm < 0
42513 || !ix86_rop_should_change_byte_p (modrm)
42514 || opno0 == opno1)
42515 continue;
42517 rtx oldreg = recog_data.operand[opno1];
42518 preprocess_constraints (insn);
42519 const operand_alternative *alt = which_op_alt ();
42521 int i;
42522 for (i = 0; i < recog_data.n_operands; i++)
42523 if (i != opno1
42524 && alt[i].earlyclobber
42525 && reg_overlap_mentioned_p (recog_data.operand[i],
42526 oldreg))
42527 break;
42529 if (i < recog_data.n_operands)
42530 continue;
42532 if (dump_file)
42533 fprintf (dump_file,
42534 "attempting to fix modrm byte in insn %d:"
42535 " reg %d class %s", INSN_UID (insn), REGNO (oldreg),
42536 reg_class_names[alt[opno1].cl]);
42538 HARD_REG_SET unavailable;
42539 REG_SET_TO_HARD_REG_SET (unavailable, &live);
42540 SET_HARD_REG_BIT (unavailable, REGNO (oldreg));
42541 IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set);
42542 IOR_HARD_REG_SET (unavailable, fixed_reg_set);
42543 IOR_HARD_REG_SET (unavailable, output_risky);
42544 IOR_COMPL_HARD_REG_SET (unavailable,
42545 reg_class_contents[alt[opno1].cl]);
42547 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42548 if (!TEST_HARD_REG_BIT (unavailable, i))
42549 break;
42550 if (i == FIRST_PSEUDO_REGISTER)
42552 if (dump_file)
42553 fprintf (dump_file, ", none available\n");
42554 continue;
42556 if (dump_file)
42557 fprintf (dump_file, " -> %d\n", i);
42558 rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i);
42559 validate_change (insn, recog_data.operand_loc[opno1], newreg, false);
42560 insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn);
42565 /* Implement machine specific optimizations. We implement padding of returns
42566 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
42567 static void
42568 ix86_reorg (void)
42570 /* We are freeing block_for_insn in the toplev to keep compatibility
42571 with old MDEP_REORGS that are not CFG based. Recompute it now. */
42572 compute_bb_for_insn ();
42574 if (flag_mitigate_rop)
42575 ix86_mitigate_rop ();
42577 if (TARGET_SEH && current_function_has_exception_handlers ())
42578 ix86_seh_fixup_eh_fallthru ();
42580 if (optimize && optimize_function_for_speed_p (cfun))
42582 if (TARGET_PAD_SHORT_FUNCTION)
42583 ix86_pad_short_function ();
42584 else if (TARGET_PAD_RETURNS)
42585 ix86_pad_returns ();
42586 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
42587 if (TARGET_FOUR_JUMP_LIMIT)
42588 ix86_avoid_jump_mispredicts ();
42589 #endif
42593 /* Return nonzero when QImode register that must be represented via REX prefix
42594 is used. */
42595 bool
42596 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
42598 int i;
42599 extract_insn_cached (insn);
42600 for (i = 0; i < recog_data.n_operands; i++)
42601 if (GENERAL_REG_P (recog_data.operand[i])
42602 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
42603 return true;
42604 return false;
42607 /* Return true when INSN mentions register that must be encoded using REX
42608 prefix. */
42609 bool
42610 x86_extended_reg_mentioned_p (rtx insn)
42612 subrtx_iterator::array_type array;
42613 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
42615 const_rtx x = *iter;
42616 if (REG_P (x)
42617 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
42618 return true;
42620 return false;
42623 /* If profitable, negate (without causing overflow) integer constant
42624 of mode MODE at location LOC. Return true in this case. */
42625 bool
42626 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
42628 HOST_WIDE_INT val;
42630 if (!CONST_INT_P (*loc))
42631 return false;
42633 switch (mode)
42635 case E_DImode:
42636 /* DImode x86_64 constants must fit in 32 bits. */
42637 gcc_assert (x86_64_immediate_operand (*loc, mode));
42639 mode = SImode;
42640 break;
42642 case E_SImode:
42643 case E_HImode:
42644 case E_QImode:
42645 break;
42647 default:
42648 gcc_unreachable ();
42651 /* Avoid overflows. */
42652 if (mode_signbit_p (mode, *loc))
42653 return false;
42655 val = INTVAL (*loc);
42657 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
42658 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
42659 if ((val < 0 && val != -128)
42660 || val == 128)
42662 *loc = GEN_INT (-val);
42663 return true;
42666 return false;
42669 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
42670 optabs would emit if we didn't have TFmode patterns. */
42672 void
42673 x86_emit_floatuns (rtx operands[2])
42675 rtx_code_label *neglab, *donelab;
42676 rtx i0, i1, f0, in, out;
42677 machine_mode mode, inmode;
42679 inmode = GET_MODE (operands[1]);
42680 gcc_assert (inmode == SImode || inmode == DImode);
42682 out = operands[0];
42683 in = force_reg (inmode, operands[1]);
42684 mode = GET_MODE (out);
42685 neglab = gen_label_rtx ();
42686 donelab = gen_label_rtx ();
42687 f0 = gen_reg_rtx (mode);
42689 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
42691 expand_float (out, in, 0);
42693 emit_jump_insn (gen_jump (donelab));
42694 emit_barrier ();
42696 emit_label (neglab);
42698 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
42699 1, OPTAB_DIRECT);
42700 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
42701 1, OPTAB_DIRECT);
42702 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
42704 expand_float (f0, i0, 0);
42706 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
42708 emit_label (donelab);
42711 static bool canonicalize_perm (struct expand_vec_perm_d *d);
42712 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
42713 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
42714 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
42716 /* Get a vector mode of the same size as the original but with elements
42717 twice as wide. This is only guaranteed to apply to integral vectors. */
42719 static inline machine_mode
42720 get_mode_wider_vector (machine_mode o)
42722 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
42723 machine_mode n = GET_MODE_WIDER_MODE (o).require ();
42724 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
42725 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
42726 return n;
42729 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
42730 fill target with val via vec_duplicate. */
42732 static bool
42733 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
42735 bool ok;
42736 rtx_insn *insn;
42737 rtx dup;
42739 /* First attempt to recognize VAL as-is. */
42740 dup = gen_vec_duplicate (mode, val);
42741 insn = emit_insn (gen_rtx_SET (target, dup));
42742 if (recog_memoized (insn) < 0)
42744 rtx_insn *seq;
42745 machine_mode innermode = GET_MODE_INNER (mode);
42746 rtx reg;
42748 /* If that fails, force VAL into a register. */
42750 start_sequence ();
42751 reg = force_reg (innermode, val);
42752 if (GET_MODE (reg) != innermode)
42753 reg = gen_lowpart (innermode, reg);
42754 SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
42755 seq = get_insns ();
42756 end_sequence ();
42757 if (seq)
42758 emit_insn_before (seq, insn);
42760 ok = recog_memoized (insn) >= 0;
42761 gcc_assert (ok);
42763 return true;
42766 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
42767 with all elements equal to VAR. Return true if successful. */
42769 static bool
42770 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
42771 rtx target, rtx val)
42773 bool ok;
42775 switch (mode)
42777 case E_V2SImode:
42778 case E_V2SFmode:
42779 if (!mmx_ok)
42780 return false;
42781 /* FALLTHRU */
42783 case E_V4DFmode:
42784 case E_V4DImode:
42785 case E_V8SFmode:
42786 case E_V8SImode:
42787 case E_V2DFmode:
42788 case E_V2DImode:
42789 case E_V4SFmode:
42790 case E_V4SImode:
42791 case E_V16SImode:
42792 case E_V8DImode:
42793 case E_V16SFmode:
42794 case E_V8DFmode:
42795 return ix86_vector_duplicate_value (mode, target, val);
42797 case E_V4HImode:
42798 if (!mmx_ok)
42799 return false;
42800 if (TARGET_SSE || TARGET_3DNOW_A)
42802 rtx x;
42804 val = gen_lowpart (SImode, val);
42805 x = gen_rtx_TRUNCATE (HImode, val);
42806 x = gen_rtx_VEC_DUPLICATE (mode, x);
42807 emit_insn (gen_rtx_SET (target, x));
42808 return true;
42810 goto widen;
42812 case E_V8QImode:
42813 if (!mmx_ok)
42814 return false;
42815 goto widen;
42817 case E_V8HImode:
42818 if (TARGET_AVX2)
42819 return ix86_vector_duplicate_value (mode, target, val);
42821 if (TARGET_SSE2)
42823 struct expand_vec_perm_d dperm;
42824 rtx tmp1, tmp2;
42826 permute:
42827 memset (&dperm, 0, sizeof (dperm));
42828 dperm.target = target;
42829 dperm.vmode = mode;
42830 dperm.nelt = GET_MODE_NUNITS (mode);
42831 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
42832 dperm.one_operand_p = true;
42834 /* Extend to SImode using a paradoxical SUBREG. */
42835 tmp1 = gen_reg_rtx (SImode);
42836 emit_move_insn (tmp1, gen_lowpart (SImode, val));
42838 /* Insert the SImode value as low element of a V4SImode vector. */
42839 tmp2 = gen_reg_rtx (V4SImode);
42840 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
42841 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
42843 ok = (expand_vec_perm_1 (&dperm)
42844 || expand_vec_perm_broadcast_1 (&dperm));
42845 gcc_assert (ok);
42846 return ok;
42848 goto widen;
42850 case E_V16QImode:
42851 if (TARGET_AVX2)
42852 return ix86_vector_duplicate_value (mode, target, val);
42854 if (TARGET_SSE2)
42855 goto permute;
42856 goto widen;
42858 widen:
42859 /* Replicate the value once into the next wider mode and recurse. */
42861 machine_mode smode, wsmode, wvmode;
42862 rtx x;
42864 smode = GET_MODE_INNER (mode);
42865 wvmode = get_mode_wider_vector (mode);
42866 wsmode = GET_MODE_INNER (wvmode);
42868 val = convert_modes (wsmode, smode, val, true);
42869 x = expand_simple_binop (wsmode, ASHIFT, val,
42870 GEN_INT (GET_MODE_BITSIZE (smode)),
42871 NULL_RTX, 1, OPTAB_LIB_WIDEN);
42872 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
42874 x = gen_reg_rtx (wvmode);
42875 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
42876 gcc_assert (ok);
42877 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
42878 return ok;
42881 case E_V16HImode:
42882 case E_V32QImode:
42883 if (TARGET_AVX2)
42884 return ix86_vector_duplicate_value (mode, target, val);
42885 else
42887 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
42888 rtx x = gen_reg_rtx (hvmode);
42890 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
42891 gcc_assert (ok);
42893 x = gen_rtx_VEC_CONCAT (mode, x, x);
42894 emit_insn (gen_rtx_SET (target, x));
42896 return true;
42898 case E_V64QImode:
42899 case E_V32HImode:
42900 if (TARGET_AVX512BW)
42901 return ix86_vector_duplicate_value (mode, target, val);
42902 else
42904 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
42905 rtx x = gen_reg_rtx (hvmode);
42907 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
42908 gcc_assert (ok);
42910 x = gen_rtx_VEC_CONCAT (mode, x, x);
42911 emit_insn (gen_rtx_SET (target, x));
42913 return true;
42915 default:
42916 return false;
42920 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
42921 whose ONE_VAR element is VAR, and other elements are zero. Return true
42922 if successful. */
42924 static bool
42925 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
42926 rtx target, rtx var, int one_var)
42928 machine_mode vsimode;
42929 rtx new_target;
42930 rtx x, tmp;
42931 bool use_vector_set = false;
42932 rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
42934 switch (mode)
42936 case E_V2DImode:
42937 /* For SSE4.1, we normally use vector set. But if the second
42938 element is zero and inter-unit moves are OK, we use movq
42939 instead. */
42940 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
42941 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
42942 && one_var == 0));
42943 break;
42944 case E_V16QImode:
42945 case E_V4SImode:
42946 case E_V4SFmode:
42947 use_vector_set = TARGET_SSE4_1;
42948 break;
42949 case E_V8HImode:
42950 use_vector_set = TARGET_SSE2;
42951 break;
42952 case E_V4HImode:
42953 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
42954 break;
42955 case E_V32QImode:
42956 case E_V16HImode:
42957 use_vector_set = TARGET_AVX;
42958 break;
42959 case E_V8SImode:
42960 use_vector_set = TARGET_AVX;
42961 gen_vec_set_0 = gen_vec_setv8si_0;
42962 break;
42963 case E_V8SFmode:
42964 use_vector_set = TARGET_AVX;
42965 gen_vec_set_0 = gen_vec_setv8sf_0;
42966 break;
42967 case E_V4DFmode:
42968 use_vector_set = TARGET_AVX;
42969 gen_vec_set_0 = gen_vec_setv4df_0;
42970 break;
42971 case E_V4DImode:
42972 /* Use ix86_expand_vector_set in 64bit mode only. */
42973 use_vector_set = TARGET_AVX && TARGET_64BIT;
42974 gen_vec_set_0 = gen_vec_setv4di_0;
42975 break;
42976 case E_V16SImode:
42977 use_vector_set = TARGET_AVX512F && one_var == 0;
42978 gen_vec_set_0 = gen_vec_setv16si_0;
42979 break;
42980 case E_V16SFmode:
42981 use_vector_set = TARGET_AVX512F && one_var == 0;
42982 gen_vec_set_0 = gen_vec_setv16sf_0;
42983 break;
42984 case E_V8DFmode:
42985 use_vector_set = TARGET_AVX512F && one_var == 0;
42986 gen_vec_set_0 = gen_vec_setv8df_0;
42987 break;
42988 case E_V8DImode:
42989 /* Use ix86_expand_vector_set in 64bit mode only. */
42990 use_vector_set = TARGET_AVX512F && TARGET_64BIT && one_var == 0;
42991 gen_vec_set_0 = gen_vec_setv8di_0;
42992 break;
42993 default:
42994 break;
42997 if (use_vector_set)
42999 if (gen_vec_set_0 && one_var == 0)
43001 var = force_reg (GET_MODE_INNER (mode), var);
43002 emit_insn (gen_vec_set_0 (target, CONST0_RTX (mode), var));
43003 return true;
43005 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
43006 var = force_reg (GET_MODE_INNER (mode), var);
43007 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43008 return true;
43011 switch (mode)
43013 case E_V2SFmode:
43014 case E_V2SImode:
43015 if (!mmx_ok)
43016 return false;
43017 /* FALLTHRU */
43019 case E_V2DFmode:
43020 case E_V2DImode:
43021 if (one_var != 0)
43022 return false;
43023 var = force_reg (GET_MODE_INNER (mode), var);
43024 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43025 emit_insn (gen_rtx_SET (target, x));
43026 return true;
43028 case E_V4SFmode:
43029 case E_V4SImode:
43030 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43031 new_target = gen_reg_rtx (mode);
43032 else
43033 new_target = target;
43034 var = force_reg (GET_MODE_INNER (mode), var);
43035 x = gen_rtx_VEC_DUPLICATE (mode, var);
43036 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43037 emit_insn (gen_rtx_SET (new_target, x));
43038 if (one_var != 0)
43040 /* We need to shuffle the value to the correct position, so
43041 create a new pseudo to store the intermediate result. */
43043 /* With SSE2, we can use the integer shuffle insns. */
43044 if (mode != V4SFmode && TARGET_SSE2)
43046 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43047 const1_rtx,
43048 GEN_INT (one_var == 1 ? 0 : 1),
43049 GEN_INT (one_var == 2 ? 0 : 1),
43050 GEN_INT (one_var == 3 ? 0 : 1)));
43051 if (target != new_target)
43052 emit_move_insn (target, new_target);
43053 return true;
43056 /* Otherwise convert the intermediate result to V4SFmode and
43057 use the SSE1 shuffle instructions. */
43058 if (mode != V4SFmode)
43060 tmp = gen_reg_rtx (V4SFmode);
43061 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43063 else
43064 tmp = new_target;
43066 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43067 const1_rtx,
43068 GEN_INT (one_var == 1 ? 0 : 1),
43069 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43070 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43072 if (mode != V4SFmode)
43073 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43074 else if (tmp != target)
43075 emit_move_insn (target, tmp);
43077 else if (target != new_target)
43078 emit_move_insn (target, new_target);
43079 return true;
43081 case E_V8HImode:
43082 case E_V16QImode:
43083 vsimode = V4SImode;
43084 goto widen;
43085 case E_V4HImode:
43086 case E_V8QImode:
43087 if (!mmx_ok)
43088 return false;
43089 vsimode = V2SImode;
43090 goto widen;
43091 widen:
43092 if (one_var != 0)
43093 return false;
43095 /* Zero extend the variable element to SImode and recurse. */
43096 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43098 x = gen_reg_rtx (vsimode);
43099 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43100 var, one_var))
43101 gcc_unreachable ();
43103 emit_move_insn (target, gen_lowpart (mode, x));
43104 return true;
43106 default:
43107 return false;
43111 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43112 consisting of the values in VALS. It is known that all elements
43113 except ONE_VAR are constants. Return true if successful. */
43115 static bool
43116 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43117 rtx target, rtx vals, int one_var)
43119 rtx var = XVECEXP (vals, 0, one_var);
43120 machine_mode wmode;
43121 rtx const_vec, x;
43123 const_vec = copy_rtx (vals);
43124 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43125 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43127 switch (mode)
43129 case E_V2DFmode:
43130 case E_V2DImode:
43131 case E_V2SFmode:
43132 case E_V2SImode:
43133 /* For the two element vectors, it's just as easy to use
43134 the general case. */
43135 return false;
43137 case E_V4DImode:
43138 /* Use ix86_expand_vector_set in 64bit mode only. */
43139 if (!TARGET_64BIT)
43140 return false;
43141 /* FALLTHRU */
43142 case E_V4DFmode:
43143 case E_V8SFmode:
43144 case E_V8SImode:
43145 case E_V16HImode:
43146 case E_V32QImode:
43147 case E_V4SFmode:
43148 case E_V4SImode:
43149 case E_V8HImode:
43150 case E_V4HImode:
43151 break;
43153 case E_V16QImode:
43154 if (TARGET_SSE4_1)
43155 break;
43156 wmode = V8HImode;
43157 goto widen;
43158 case E_V8QImode:
43159 wmode = V4HImode;
43160 goto widen;
43161 widen:
43162 /* There's no way to set one QImode entry easily. Combine
43163 the variable value with its adjacent constant value, and
43164 promote to an HImode set. */
43165 x = XVECEXP (vals, 0, one_var ^ 1);
43166 if (one_var & 1)
43168 var = convert_modes (HImode, QImode, var, true);
43169 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
43170 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43171 x = GEN_INT (INTVAL (x) & 0xff);
43173 else
43175 var = convert_modes (HImode, QImode, var, true);
43176 x = gen_int_mode (INTVAL (x) << 8, HImode);
43178 if (x != const0_rtx)
43179 var = expand_simple_binop (HImode, IOR, var, x, var,
43180 1, OPTAB_LIB_WIDEN);
43182 x = gen_reg_rtx (wmode);
43183 emit_move_insn (x, gen_lowpart (wmode, const_vec));
43184 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
43186 emit_move_insn (target, gen_lowpart (mode, x));
43187 return true;
43189 default:
43190 return false;
43193 emit_move_insn (target, const_vec);
43194 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43195 return true;
43198 /* A subroutine of ix86_expand_vector_init_general. Use vector
43199 concatenate to handle the most general case: all values variable,
43200 and none identical. */
43202 static void
43203 ix86_expand_vector_init_concat (machine_mode mode,
43204 rtx target, rtx *ops, int n)
43206 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
43207 rtx first[16], second[8], third[4];
43208 rtvec v;
43209 int i, j;
43211 switch (n)
43213 case 2:
43214 switch (mode)
43216 case E_V16SImode:
43217 cmode = V8SImode;
43218 break;
43219 case E_V16SFmode:
43220 cmode = V8SFmode;
43221 break;
43222 case E_V8DImode:
43223 cmode = V4DImode;
43224 break;
43225 case E_V8DFmode:
43226 cmode = V4DFmode;
43227 break;
43228 case E_V8SImode:
43229 cmode = V4SImode;
43230 break;
43231 case E_V8SFmode:
43232 cmode = V4SFmode;
43233 break;
43234 case E_V4DImode:
43235 cmode = V2DImode;
43236 break;
43237 case E_V4DFmode:
43238 cmode = V2DFmode;
43239 break;
43240 case E_V4SImode:
43241 cmode = V2SImode;
43242 break;
43243 case E_V4SFmode:
43244 cmode = V2SFmode;
43245 break;
43246 case E_V2DImode:
43247 cmode = DImode;
43248 break;
43249 case E_V2SImode:
43250 cmode = SImode;
43251 break;
43252 case E_V2DFmode:
43253 cmode = DFmode;
43254 break;
43255 case E_V2SFmode:
43256 cmode = SFmode;
43257 break;
43258 default:
43259 gcc_unreachable ();
43262 if (!register_operand (ops[1], cmode))
43263 ops[1] = force_reg (cmode, ops[1]);
43264 if (!register_operand (ops[0], cmode))
43265 ops[0] = force_reg (cmode, ops[0]);
43266 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
43267 ops[1])));
43268 break;
43270 case 4:
43271 switch (mode)
43273 case E_V4DImode:
43274 cmode = V2DImode;
43275 break;
43276 case E_V4DFmode:
43277 cmode = V2DFmode;
43278 break;
43279 case E_V4SImode:
43280 cmode = V2SImode;
43281 break;
43282 case E_V4SFmode:
43283 cmode = V2SFmode;
43284 break;
43285 default:
43286 gcc_unreachable ();
43288 goto half;
43290 case 8:
43291 switch (mode)
43293 case E_V8DImode:
43294 cmode = V2DImode;
43295 hmode = V4DImode;
43296 break;
43297 case E_V8DFmode:
43298 cmode = V2DFmode;
43299 hmode = V4DFmode;
43300 break;
43301 case E_V8SImode:
43302 cmode = V2SImode;
43303 hmode = V4SImode;
43304 break;
43305 case E_V8SFmode:
43306 cmode = V2SFmode;
43307 hmode = V4SFmode;
43308 break;
43309 default:
43310 gcc_unreachable ();
43312 goto half;
43314 case 16:
43315 switch (mode)
43317 case E_V16SImode:
43318 cmode = V2SImode;
43319 hmode = V4SImode;
43320 gmode = V8SImode;
43321 break;
43322 case E_V16SFmode:
43323 cmode = V2SFmode;
43324 hmode = V4SFmode;
43325 gmode = V8SFmode;
43326 break;
43327 default:
43328 gcc_unreachable ();
43330 goto half;
43332 half:
43333 /* FIXME: We process inputs backward to help RA. PR 36222. */
43334 i = n - 1;
43335 j = (n >> 1) - 1;
43336 for (; i > 0; i -= 2, j--)
43338 first[j] = gen_reg_rtx (cmode);
43339 v = gen_rtvec (2, ops[i - 1], ops[i]);
43340 ix86_expand_vector_init (false, first[j],
43341 gen_rtx_PARALLEL (cmode, v));
43344 n >>= 1;
43345 if (n > 4)
43347 gcc_assert (hmode != VOIDmode);
43348 gcc_assert (gmode != VOIDmode);
43349 for (i = j = 0; i < n; i += 2, j++)
43351 second[j] = gen_reg_rtx (hmode);
43352 ix86_expand_vector_init_concat (hmode, second [j],
43353 &first [i], 2);
43355 n >>= 1;
43356 for (i = j = 0; i < n; i += 2, j++)
43358 third[j] = gen_reg_rtx (gmode);
43359 ix86_expand_vector_init_concat (gmode, third[j],
43360 &second[i], 2);
43362 n >>= 1;
43363 ix86_expand_vector_init_concat (mode, target, third, n);
43365 else if (n > 2)
43367 gcc_assert (hmode != VOIDmode);
43368 for (i = j = 0; i < n; i += 2, j++)
43370 second[j] = gen_reg_rtx (hmode);
43371 ix86_expand_vector_init_concat (hmode, second [j],
43372 &first [i], 2);
43374 n >>= 1;
43375 ix86_expand_vector_init_concat (mode, target, second, n);
43377 else
43378 ix86_expand_vector_init_concat (mode, target, first, n);
43379 break;
43381 default:
43382 gcc_unreachable ();
43386 /* A subroutine of ix86_expand_vector_init_general. Use vector
43387 interleave to handle the most general case: all values variable,
43388 and none identical. */
43390 static void
43391 ix86_expand_vector_init_interleave (machine_mode mode,
43392 rtx target, rtx *ops, int n)
43394 machine_mode first_imode, second_imode, third_imode, inner_mode;
43395 int i, j;
43396 rtx op0, op1;
43397 rtx (*gen_load_even) (rtx, rtx, rtx);
43398 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
43399 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
43401 switch (mode)
43403 case E_V8HImode:
43404 gen_load_even = gen_vec_setv8hi;
43405 gen_interleave_first_low = gen_vec_interleave_lowv4si;
43406 gen_interleave_second_low = gen_vec_interleave_lowv2di;
43407 inner_mode = HImode;
43408 first_imode = V4SImode;
43409 second_imode = V2DImode;
43410 third_imode = VOIDmode;
43411 break;
43412 case E_V16QImode:
43413 gen_load_even = gen_vec_setv16qi;
43414 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
43415 gen_interleave_second_low = gen_vec_interleave_lowv4si;
43416 inner_mode = QImode;
43417 first_imode = V8HImode;
43418 second_imode = V4SImode;
43419 third_imode = V2DImode;
43420 break;
43421 default:
43422 gcc_unreachable ();
43425 for (i = 0; i < n; i++)
43427 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
43428 op0 = gen_reg_rtx (SImode);
43429 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
43431 /* Insert the SImode value as low element of V4SImode vector. */
43432 op1 = gen_reg_rtx (V4SImode);
43433 op0 = gen_rtx_VEC_MERGE (V4SImode,
43434 gen_rtx_VEC_DUPLICATE (V4SImode,
43435 op0),
43436 CONST0_RTX (V4SImode),
43437 const1_rtx);
43438 emit_insn (gen_rtx_SET (op1, op0));
43440 /* Cast the V4SImode vector back to a vector in orignal mode. */
43441 op0 = gen_reg_rtx (mode);
43442 emit_move_insn (op0, gen_lowpart (mode, op1));
43444 /* Load even elements into the second position. */
43445 emit_insn (gen_load_even (op0,
43446 force_reg (inner_mode,
43447 ops [i + i + 1]),
43448 const1_rtx));
43450 /* Cast vector to FIRST_IMODE vector. */
43451 ops[i] = gen_reg_rtx (first_imode);
43452 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
43455 /* Interleave low FIRST_IMODE vectors. */
43456 for (i = j = 0; i < n; i += 2, j++)
43458 op0 = gen_reg_rtx (first_imode);
43459 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
43461 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
43462 ops[j] = gen_reg_rtx (second_imode);
43463 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
43466 /* Interleave low SECOND_IMODE vectors. */
43467 switch (second_imode)
43469 case E_V4SImode:
43470 for (i = j = 0; i < n / 2; i += 2, j++)
43472 op0 = gen_reg_rtx (second_imode);
43473 emit_insn (gen_interleave_second_low (op0, ops[i],
43474 ops[i + 1]));
43476 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
43477 vector. */
43478 ops[j] = gen_reg_rtx (third_imode);
43479 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
43481 second_imode = V2DImode;
43482 gen_interleave_second_low = gen_vec_interleave_lowv2di;
43483 /* FALLTHRU */
43485 case E_V2DImode:
43486 op0 = gen_reg_rtx (second_imode);
43487 emit_insn (gen_interleave_second_low (op0, ops[0],
43488 ops[1]));
43490 /* Cast the SECOND_IMODE vector back to a vector on original
43491 mode. */
43492 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
43493 break;
43495 default:
43496 gcc_unreachable ();
43500 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
43501 all values variable, and none identical. */
43503 static void
43504 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
43505 rtx target, rtx vals)
43507 rtx ops[64], op0, op1, op2, op3, op4, op5;
43508 machine_mode half_mode = VOIDmode;
43509 machine_mode quarter_mode = VOIDmode;
43510 int n, i;
43512 switch (mode)
43514 case E_V2SFmode:
43515 case E_V2SImode:
43516 if (!mmx_ok && !TARGET_SSE)
43517 break;
43518 /* FALLTHRU */
43520 case E_V16SImode:
43521 case E_V16SFmode:
43522 case E_V8DFmode:
43523 case E_V8DImode:
43524 case E_V8SFmode:
43525 case E_V8SImode:
43526 case E_V4DFmode:
43527 case E_V4DImode:
43528 case E_V4SFmode:
43529 case E_V4SImode:
43530 case E_V2DFmode:
43531 case E_V2DImode:
43532 n = GET_MODE_NUNITS (mode);
43533 for (i = 0; i < n; i++)
43534 ops[i] = XVECEXP (vals, 0, i);
43535 ix86_expand_vector_init_concat (mode, target, ops, n);
43536 return;
43538 case E_V2TImode:
43539 for (i = 0; i < 2; i++)
43540 ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
43541 op0 = gen_reg_rtx (V4DImode);
43542 ix86_expand_vector_init_concat (V4DImode, op0, ops, 2);
43543 emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
43544 return;
43546 case E_V4TImode:
43547 for (i = 0; i < 4; i++)
43548 ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
43549 ops[4] = gen_reg_rtx (V4DImode);
43550 ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2);
43551 ops[5] = gen_reg_rtx (V4DImode);
43552 ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2);
43553 op0 = gen_reg_rtx (V8DImode);
43554 ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2);
43555 emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
43556 return;
43558 case E_V32QImode:
43559 half_mode = V16QImode;
43560 goto half;
43562 case E_V16HImode:
43563 half_mode = V8HImode;
43564 goto half;
43566 half:
43567 n = GET_MODE_NUNITS (mode);
43568 for (i = 0; i < n; i++)
43569 ops[i] = XVECEXP (vals, 0, i);
43570 op0 = gen_reg_rtx (half_mode);
43571 op1 = gen_reg_rtx (half_mode);
43572 ix86_expand_vector_init_interleave (half_mode, op0, ops,
43573 n >> 2);
43574 ix86_expand_vector_init_interleave (half_mode, op1,
43575 &ops [n >> 1], n >> 2);
43576 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
43577 return;
43579 case E_V64QImode:
43580 quarter_mode = V16QImode;
43581 half_mode = V32QImode;
43582 goto quarter;
43584 case E_V32HImode:
43585 quarter_mode = V8HImode;
43586 half_mode = V16HImode;
43587 goto quarter;
43589 quarter:
43590 n = GET_MODE_NUNITS (mode);
43591 for (i = 0; i < n; i++)
43592 ops[i] = XVECEXP (vals, 0, i);
43593 op0 = gen_reg_rtx (quarter_mode);
43594 op1 = gen_reg_rtx (quarter_mode);
43595 op2 = gen_reg_rtx (quarter_mode);
43596 op3 = gen_reg_rtx (quarter_mode);
43597 op4 = gen_reg_rtx (half_mode);
43598 op5 = gen_reg_rtx (half_mode);
43599 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
43600 n >> 3);
43601 ix86_expand_vector_init_interleave (quarter_mode, op1,
43602 &ops [n >> 2], n >> 3);
43603 ix86_expand_vector_init_interleave (quarter_mode, op2,
43604 &ops [n >> 1], n >> 3);
43605 ix86_expand_vector_init_interleave (quarter_mode, op3,
43606 &ops [(n >> 1) | (n >> 2)], n >> 3);
43607 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
43608 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
43609 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
43610 return;
43612 case E_V16QImode:
43613 if (!TARGET_SSE4_1)
43614 break;
43615 /* FALLTHRU */
43617 case E_V8HImode:
43618 if (!TARGET_SSE2)
43619 break;
43621 /* Don't use ix86_expand_vector_init_interleave if we can't
43622 move from GPR to SSE register directly. */
43623 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
43624 break;
43626 n = GET_MODE_NUNITS (mode);
43627 for (i = 0; i < n; i++)
43628 ops[i] = XVECEXP (vals, 0, i);
43629 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
43630 return;
43632 case E_V4HImode:
43633 case E_V8QImode:
43634 break;
43636 default:
43637 gcc_unreachable ();
43641 int i, j, n_elts, n_words, n_elt_per_word;
43642 machine_mode inner_mode;
43643 rtx words[4], shift;
43645 inner_mode = GET_MODE_INNER (mode);
43646 n_elts = GET_MODE_NUNITS (mode);
43647 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
43648 n_elt_per_word = n_elts / n_words;
43649 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
43651 for (i = 0; i < n_words; ++i)
43653 rtx word = NULL_RTX;
43655 for (j = 0; j < n_elt_per_word; ++j)
43657 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
43658 elt = convert_modes (word_mode, inner_mode, elt, true);
43660 if (j == 0)
43661 word = elt;
43662 else
43664 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
43665 word, 1, OPTAB_LIB_WIDEN);
43666 word = expand_simple_binop (word_mode, IOR, word, elt,
43667 word, 1, OPTAB_LIB_WIDEN);
43671 words[i] = word;
43674 if (n_words == 1)
43675 emit_move_insn (target, gen_lowpart (mode, words[0]));
43676 else if (n_words == 2)
43678 rtx tmp = gen_reg_rtx (mode);
43679 emit_clobber (tmp);
43680 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
43681 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
43682 emit_move_insn (target, tmp);
43684 else if (n_words == 4)
43686 rtx tmp = gen_reg_rtx (V4SImode);
43687 gcc_assert (word_mode == SImode);
43688 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
43689 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
43690 emit_move_insn (target, gen_lowpart (mode, tmp));
43692 else
43693 gcc_unreachable ();
43697 /* Initialize vector TARGET via VALS. Suppress the use of MMX
43698 instructions unless MMX_OK is true. */
43700 void
43701 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
43703 machine_mode mode = GET_MODE (target);
43704 machine_mode inner_mode = GET_MODE_INNER (mode);
43705 int n_elts = GET_MODE_NUNITS (mode);
43706 int n_var = 0, one_var = -1;
43707 bool all_same = true, all_const_zero = true;
43708 int i;
43709 rtx x;
43711 /* Handle first initialization from vector elts. */
43712 if (n_elts != XVECLEN (vals, 0))
43714 rtx subtarget = target;
43715 x = XVECEXP (vals, 0, 0);
43716 gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode);
43717 if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts)
43719 rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) };
43720 if (inner_mode == QImode || inner_mode == HImode)
43722 unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode);
43723 mode = mode_for_vector (SImode, n_bits / 4).require ();
43724 inner_mode = mode_for_vector (SImode, n_bits / 8).require ();
43725 ops[0] = gen_lowpart (inner_mode, ops[0]);
43726 ops[1] = gen_lowpart (inner_mode, ops[1]);
43727 subtarget = gen_reg_rtx (mode);
43729 ix86_expand_vector_init_concat (mode, subtarget, ops, 2);
43730 if (subtarget != target)
43731 emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget));
43732 return;
43734 gcc_unreachable ();
43737 for (i = 0; i < n_elts; ++i)
43739 x = XVECEXP (vals, 0, i);
43740 if (!(CONST_SCALAR_INT_P (x)
43741 || CONST_DOUBLE_P (x)
43742 || CONST_FIXED_P (x)))
43743 n_var++, one_var = i;
43744 else if (x != CONST0_RTX (inner_mode))
43745 all_const_zero = false;
43746 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
43747 all_same = false;
43750 /* Constants are best loaded from the constant pool. */
43751 if (n_var == 0)
43753 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
43754 return;
43757 /* If all values are identical, broadcast the value. */
43758 if (all_same
43759 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
43760 XVECEXP (vals, 0, 0)))
43761 return;
43763 /* Values where only one field is non-constant are best loaded from
43764 the pool and overwritten via move later. */
43765 if (n_var == 1)
43767 if (all_const_zero
43768 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
43769 XVECEXP (vals, 0, one_var),
43770 one_var))
43771 return;
43773 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
43774 return;
43777 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
43780 void
43781 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
43783 machine_mode mode = GET_MODE (target);
43784 machine_mode inner_mode = GET_MODE_INNER (mode);
43785 machine_mode half_mode;
43786 bool use_vec_merge = false;
43787 rtx tmp;
43788 static rtx (*gen_extract[6][2]) (rtx, rtx)
43790 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
43791 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
43792 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
43793 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
43794 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
43795 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
43797 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
43799 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
43800 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
43801 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
43802 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
43803 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
43804 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
43806 int i, j, n;
43807 machine_mode mmode = VOIDmode;
43808 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
43810 switch (mode)
43812 case E_V2SFmode:
43813 case E_V2SImode:
43814 if (mmx_ok)
43816 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
43817 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
43818 if (elt == 0)
43819 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
43820 else
43821 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
43822 emit_insn (gen_rtx_SET (target, tmp));
43823 return;
43825 break;
43827 case E_V2DImode:
43828 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
43829 if (use_vec_merge)
43830 break;
43832 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
43833 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
43834 if (elt == 0)
43835 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
43836 else
43837 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
43838 emit_insn (gen_rtx_SET (target, tmp));
43839 return;
43841 case E_V2DFmode:
43843 rtx op0, op1;
43845 /* For the two element vectors, we implement a VEC_CONCAT with
43846 the extraction of the other element. */
43848 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
43849 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
43851 if (elt == 0)
43852 op0 = val, op1 = tmp;
43853 else
43854 op0 = tmp, op1 = val;
43856 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
43857 emit_insn (gen_rtx_SET (target, tmp));
43859 return;
43861 case E_V4SFmode:
43862 use_vec_merge = TARGET_SSE4_1;
43863 if (use_vec_merge)
43864 break;
43866 switch (elt)
43868 case 0:
43869 use_vec_merge = true;
43870 break;
43872 case 1:
43873 /* tmp = target = A B C D */
43874 tmp = copy_to_reg (target);
43875 /* target = A A B B */
43876 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
43877 /* target = X A B B */
43878 ix86_expand_vector_set (false, target, val, 0);
43879 /* target = A X C D */
43880 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43881 const1_rtx, const0_rtx,
43882 GEN_INT (2+4), GEN_INT (3+4)));
43883 return;
43885 case 2:
43886 /* tmp = target = A B C D */
43887 tmp = copy_to_reg (target);
43888 /* tmp = X B C D */
43889 ix86_expand_vector_set (false, tmp, val, 0);
43890 /* target = A B X D */
43891 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43892 const0_rtx, const1_rtx,
43893 GEN_INT (0+4), GEN_INT (3+4)));
43894 return;
43896 case 3:
43897 /* tmp = target = A B C D */
43898 tmp = copy_to_reg (target);
43899 /* tmp = X B C D */
43900 ix86_expand_vector_set (false, tmp, val, 0);
43901 /* target = A B X D */
43902 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43903 const0_rtx, const1_rtx,
43904 GEN_INT (2+4), GEN_INT (0+4)));
43905 return;
43907 default:
43908 gcc_unreachable ();
43910 break;
43912 case E_V4SImode:
43913 use_vec_merge = TARGET_SSE4_1;
43914 if (use_vec_merge)
43915 break;
43917 /* Element 0 handled by vec_merge below. */
43918 if (elt == 0)
43920 use_vec_merge = true;
43921 break;
43924 if (TARGET_SSE2)
43926 /* With SSE2, use integer shuffles to swap element 0 and ELT,
43927 store into element 0, then shuffle them back. */
43929 rtx order[4];
43931 order[0] = GEN_INT (elt);
43932 order[1] = const1_rtx;
43933 order[2] = const2_rtx;
43934 order[3] = GEN_INT (3);
43935 order[elt] = const0_rtx;
43937 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
43938 order[1], order[2], order[3]));
43940 ix86_expand_vector_set (false, target, val, 0);
43942 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
43943 order[1], order[2], order[3]));
43945 else
43947 /* For SSE1, we have to reuse the V4SF code. */
43948 rtx t = gen_reg_rtx (V4SFmode);
43949 emit_move_insn (t, gen_lowpart (V4SFmode, target));
43950 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
43951 emit_move_insn (target, gen_lowpart (mode, t));
43953 return;
43955 case E_V8HImode:
43956 use_vec_merge = TARGET_SSE2;
43957 break;
43958 case E_V4HImode:
43959 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
43960 break;
43962 case E_V16QImode:
43963 use_vec_merge = TARGET_SSE4_1;
43964 break;
43966 case E_V8QImode:
43967 break;
43969 case E_V32QImode:
43970 half_mode = V16QImode;
43971 j = 0;
43972 n = 16;
43973 goto half;
43975 case E_V16HImode:
43976 half_mode = V8HImode;
43977 j = 1;
43978 n = 8;
43979 goto half;
43981 case E_V8SImode:
43982 half_mode = V4SImode;
43983 j = 2;
43984 n = 4;
43985 goto half;
43987 case E_V4DImode:
43988 half_mode = V2DImode;
43989 j = 3;
43990 n = 2;
43991 goto half;
43993 case E_V8SFmode:
43994 half_mode = V4SFmode;
43995 j = 4;
43996 n = 4;
43997 goto half;
43999 case E_V4DFmode:
44000 half_mode = V2DFmode;
44001 j = 5;
44002 n = 2;
44003 goto half;
44005 half:
44006 /* Compute offset. */
44007 i = elt / n;
44008 elt %= n;
44010 gcc_assert (i <= 1);
44012 /* Extract the half. */
44013 tmp = gen_reg_rtx (half_mode);
44014 emit_insn (gen_extract[j][i] (tmp, target));
44016 /* Put val in tmp at elt. */
44017 ix86_expand_vector_set (false, tmp, val, elt);
44019 /* Put it back. */
44020 emit_insn (gen_insert[j][i] (target, target, tmp));
44021 return;
44023 case E_V8DFmode:
44024 if (TARGET_AVX512F)
44026 mmode = QImode;
44027 gen_blendm = gen_avx512f_blendmv8df;
44029 break;
44031 case E_V8DImode:
44032 if (TARGET_AVX512F)
44034 mmode = QImode;
44035 gen_blendm = gen_avx512f_blendmv8di;
44037 break;
44039 case E_V16SFmode:
44040 if (TARGET_AVX512F)
44042 mmode = HImode;
44043 gen_blendm = gen_avx512f_blendmv16sf;
44045 break;
44047 case E_V16SImode:
44048 if (TARGET_AVX512F)
44050 mmode = HImode;
44051 gen_blendm = gen_avx512f_blendmv16si;
44053 break;
44055 case E_V32HImode:
44056 if (TARGET_AVX512F && TARGET_AVX512BW)
44058 mmode = SImode;
44059 gen_blendm = gen_avx512bw_blendmv32hi;
44061 break;
44063 case E_V64QImode:
44064 if (TARGET_AVX512F && TARGET_AVX512BW)
44066 mmode = DImode;
44067 gen_blendm = gen_avx512bw_blendmv64qi;
44069 break;
44071 default:
44072 break;
44075 if (mmode != VOIDmode)
44077 tmp = gen_reg_rtx (mode);
44078 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44079 /* The avx512*_blendm<mode> expanders have different operand order
44080 from VEC_MERGE. In VEC_MERGE, the first input operand is used for
44081 elements where the mask is set and second input operand otherwise,
44082 in {sse,avx}*_*blend* the first input operand is used for elements
44083 where the mask is clear and second input operand otherwise. */
44084 emit_insn (gen_blendm (target, target, tmp,
44085 force_reg (mmode,
44086 gen_int_mode (1 << elt, mmode))));
44088 else if (use_vec_merge)
44090 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44091 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44092 emit_insn (gen_rtx_SET (target, tmp));
44094 else
44096 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44098 emit_move_insn (mem, target);
44100 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44101 emit_move_insn (tmp, val);
44103 emit_move_insn (target, mem);
44107 void
44108 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44110 machine_mode mode = GET_MODE (vec);
44111 machine_mode inner_mode = GET_MODE_INNER (mode);
44112 bool use_vec_extr = false;
44113 rtx tmp;
44115 switch (mode)
44117 case E_V2SImode:
44118 case E_V2SFmode:
44119 if (!mmx_ok)
44120 break;
44121 /* FALLTHRU */
44123 case E_V2DFmode:
44124 case E_V2DImode:
44125 case E_V2TImode:
44126 case E_V4TImode:
44127 use_vec_extr = true;
44128 break;
44130 case E_V4SFmode:
44131 use_vec_extr = TARGET_SSE4_1;
44132 if (use_vec_extr)
44133 break;
44135 switch (elt)
44137 case 0:
44138 tmp = vec;
44139 break;
44141 case 1:
44142 case 3:
44143 tmp = gen_reg_rtx (mode);
44144 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44145 GEN_INT (elt), GEN_INT (elt),
44146 GEN_INT (elt+4), GEN_INT (elt+4)));
44147 break;
44149 case 2:
44150 tmp = gen_reg_rtx (mode);
44151 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44152 break;
44154 default:
44155 gcc_unreachable ();
44157 vec = tmp;
44158 use_vec_extr = true;
44159 elt = 0;
44160 break;
44162 case E_V4SImode:
44163 use_vec_extr = TARGET_SSE4_1;
44164 if (use_vec_extr)
44165 break;
44167 if (TARGET_SSE2)
44169 switch (elt)
44171 case 0:
44172 tmp = vec;
44173 break;
44175 case 1:
44176 case 3:
44177 tmp = gen_reg_rtx (mode);
44178 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
44179 GEN_INT (elt), GEN_INT (elt),
44180 GEN_INT (elt), GEN_INT (elt)));
44181 break;
44183 case 2:
44184 tmp = gen_reg_rtx (mode);
44185 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
44186 break;
44188 default:
44189 gcc_unreachable ();
44191 vec = tmp;
44192 use_vec_extr = true;
44193 elt = 0;
44195 else
44197 /* For SSE1, we have to reuse the V4SF code. */
44198 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
44199 gen_lowpart (V4SFmode, vec), elt);
44200 return;
44202 break;
44204 case E_V8HImode:
44205 use_vec_extr = TARGET_SSE2;
44206 break;
44207 case E_V4HImode:
44208 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44209 break;
44211 case E_V16QImode:
44212 use_vec_extr = TARGET_SSE4_1;
44213 break;
44215 case E_V8SFmode:
44216 if (TARGET_AVX)
44218 tmp = gen_reg_rtx (V4SFmode);
44219 if (elt < 4)
44220 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
44221 else
44222 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
44223 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44224 return;
44226 break;
44228 case E_V4DFmode:
44229 if (TARGET_AVX)
44231 tmp = gen_reg_rtx (V2DFmode);
44232 if (elt < 2)
44233 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
44234 else
44235 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
44236 ix86_expand_vector_extract (false, target, tmp, elt & 1);
44237 return;
44239 break;
44241 case E_V32QImode:
44242 if (TARGET_AVX)
44244 tmp = gen_reg_rtx (V16QImode);
44245 if (elt < 16)
44246 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
44247 else
44248 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
44249 ix86_expand_vector_extract (false, target, tmp, elt & 15);
44250 return;
44252 break;
44254 case E_V16HImode:
44255 if (TARGET_AVX)
44257 tmp = gen_reg_rtx (V8HImode);
44258 if (elt < 8)
44259 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
44260 else
44261 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
44262 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44263 return;
44265 break;
44267 case E_V8SImode:
44268 if (TARGET_AVX)
44270 tmp = gen_reg_rtx (V4SImode);
44271 if (elt < 4)
44272 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
44273 else
44274 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
44275 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44276 return;
44278 break;
44280 case E_V4DImode:
44281 if (TARGET_AVX)
44283 tmp = gen_reg_rtx (V2DImode);
44284 if (elt < 2)
44285 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
44286 else
44287 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
44288 ix86_expand_vector_extract (false, target, tmp, elt & 1);
44289 return;
44291 break;
44293 case E_V32HImode:
44294 if (TARGET_AVX512BW)
44296 tmp = gen_reg_rtx (V16HImode);
44297 if (elt < 16)
44298 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
44299 else
44300 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
44301 ix86_expand_vector_extract (false, target, tmp, elt & 15);
44302 return;
44304 break;
44306 case E_V64QImode:
44307 if (TARGET_AVX512BW)
44309 tmp = gen_reg_rtx (V32QImode);
44310 if (elt < 32)
44311 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
44312 else
44313 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
44314 ix86_expand_vector_extract (false, target, tmp, elt & 31);
44315 return;
44317 break;
44319 case E_V16SFmode:
44320 tmp = gen_reg_rtx (V8SFmode);
44321 if (elt < 8)
44322 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
44323 else
44324 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
44325 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44326 return;
44328 case E_V8DFmode:
44329 tmp = gen_reg_rtx (V4DFmode);
44330 if (elt < 4)
44331 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
44332 else
44333 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
44334 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44335 return;
44337 case E_V16SImode:
44338 tmp = gen_reg_rtx (V8SImode);
44339 if (elt < 8)
44340 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
44341 else
44342 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
44343 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44344 return;
44346 case E_V8DImode:
44347 tmp = gen_reg_rtx (V4DImode);
44348 if (elt < 4)
44349 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
44350 else
44351 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
44352 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44353 return;
44355 case E_V8QImode:
44356 /* ??? Could extract the appropriate HImode element and shift. */
44357 default:
44358 break;
44361 if (use_vec_extr)
44363 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
44364 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
44366 /* Let the rtl optimizers know about the zero extension performed. */
44367 if (inner_mode == QImode || inner_mode == HImode)
44369 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
44370 target = gen_lowpart (SImode, target);
44373 emit_insn (gen_rtx_SET (target, tmp));
44375 else
44377 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44379 emit_move_insn (mem, vec);
44381 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44382 emit_move_insn (target, tmp);
44386 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
44387 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
44388 The upper bits of DEST are undefined, though they shouldn't cause
44389 exceptions (some bits from src or all zeros are ok). */
44391 static void
44392 emit_reduc_half (rtx dest, rtx src, int i)
44394 rtx tem, d = dest;
44395 switch (GET_MODE (src))
44397 case E_V4SFmode:
44398 if (i == 128)
44399 tem = gen_sse_movhlps (dest, src, src);
44400 else
44401 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
44402 GEN_INT (1 + 4), GEN_INT (1 + 4));
44403 break;
44404 case E_V2DFmode:
44405 tem = gen_vec_interleave_highv2df (dest, src, src);
44406 break;
44407 case E_V16QImode:
44408 case E_V8HImode:
44409 case E_V4SImode:
44410 case E_V2DImode:
44411 d = gen_reg_rtx (V1TImode);
44412 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
44413 GEN_INT (i / 2));
44414 break;
44415 case E_V8SFmode:
44416 if (i == 256)
44417 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
44418 else
44419 tem = gen_avx_shufps256 (dest, src, src,
44420 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
44421 break;
44422 case E_V4DFmode:
44423 if (i == 256)
44424 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
44425 else
44426 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
44427 break;
44428 case E_V32QImode:
44429 case E_V16HImode:
44430 case E_V8SImode:
44431 case E_V4DImode:
44432 if (i == 256)
44434 if (GET_MODE (dest) != V4DImode)
44435 d = gen_reg_rtx (V4DImode);
44436 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
44437 gen_lowpart (V4DImode, src),
44438 const1_rtx);
44440 else
44442 d = gen_reg_rtx (V2TImode);
44443 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
44444 GEN_INT (i / 2));
44446 break;
44447 case E_V64QImode:
44448 case E_V32HImode:
44449 case E_V16SImode:
44450 case E_V16SFmode:
44451 case E_V8DImode:
44452 case E_V8DFmode:
44453 if (i > 128)
44454 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
44455 gen_lowpart (V16SImode, src),
44456 gen_lowpart (V16SImode, src),
44457 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
44458 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
44459 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
44460 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
44461 GEN_INT (0xC), GEN_INT (0xD),
44462 GEN_INT (0xE), GEN_INT (0xF),
44463 GEN_INT (0x10), GEN_INT (0x11),
44464 GEN_INT (0x12), GEN_INT (0x13),
44465 GEN_INT (0x14), GEN_INT (0x15),
44466 GEN_INT (0x16), GEN_INT (0x17));
44467 else
44468 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
44469 gen_lowpart (V16SImode, src),
44470 GEN_INT (i == 128 ? 0x2 : 0x1),
44471 GEN_INT (0x3),
44472 GEN_INT (0x3),
44473 GEN_INT (0x3),
44474 GEN_INT (i == 128 ? 0x6 : 0x5),
44475 GEN_INT (0x7),
44476 GEN_INT (0x7),
44477 GEN_INT (0x7),
44478 GEN_INT (i == 128 ? 0xA : 0x9),
44479 GEN_INT (0xB),
44480 GEN_INT (0xB),
44481 GEN_INT (0xB),
44482 GEN_INT (i == 128 ? 0xE : 0xD),
44483 GEN_INT (0xF),
44484 GEN_INT (0xF),
44485 GEN_INT (0xF));
44486 break;
44487 default:
44488 gcc_unreachable ();
44490 emit_insn (tem);
44491 if (d != dest)
44492 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
44495 /* Expand a vector reduction. FN is the binary pattern to reduce;
44496 DEST is the destination; IN is the input vector. */
44498 void
44499 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
44501 rtx half, dst, vec = in;
44502 machine_mode mode = GET_MODE (in);
44503 int i;
44505 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
44506 if (TARGET_SSE4_1
44507 && mode == V8HImode
44508 && fn == gen_uminv8hi3)
44510 emit_insn (gen_sse4_1_phminposuw (dest, in));
44511 return;
44514 for (i = GET_MODE_BITSIZE (mode);
44515 i > GET_MODE_UNIT_BITSIZE (mode);
44516 i >>= 1)
44518 half = gen_reg_rtx (mode);
44519 emit_reduc_half (half, vec, i);
44520 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
44521 dst = dest;
44522 else
44523 dst = gen_reg_rtx (mode);
44524 emit_insn (fn (dst, half, vec));
44525 vec = dst;
44529 /* Target hook for scalar_mode_supported_p. */
44530 static bool
44531 ix86_scalar_mode_supported_p (scalar_mode mode)
44533 if (DECIMAL_FLOAT_MODE_P (mode))
44534 return default_decimal_float_supported_p ();
44535 else if (mode == TFmode)
44536 return true;
44537 else
44538 return default_scalar_mode_supported_p (mode);
44541 /* Implements target hook vector_mode_supported_p. */
44542 static bool
44543 ix86_vector_mode_supported_p (machine_mode mode)
44545 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
44546 return true;
44547 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
44548 return true;
44549 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
44550 return true;
44551 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
44552 return true;
44553 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
44554 return true;
44555 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
44556 return true;
44557 return false;
44560 /* Target hook for c_mode_for_suffix. */
44561 static machine_mode
44562 ix86_c_mode_for_suffix (char suffix)
44564 if (suffix == 'q')
44565 return TFmode;
44566 if (suffix == 'w')
44567 return XFmode;
44569 return VOIDmode;
44572 /* Worker function for TARGET_MD_ASM_ADJUST.
44574 We implement asm flag outputs, and maintain source compatibility
44575 with the old cc0-based compiler. */
44577 static rtx_insn *
44578 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
44579 vec<const char *> &constraints,
44580 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
44582 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
44583 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
44585 bool saw_asm_flag = false;
44587 start_sequence ();
44588 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
44590 const char *con = constraints[i];
44591 if (strncmp (con, "=@cc", 4) != 0)
44592 continue;
44593 con += 4;
44594 if (strchr (con, ',') != NULL)
44596 error ("alternatives not allowed in asm flag output");
44597 continue;
44600 bool invert = false;
44601 if (con[0] == 'n')
44602 invert = true, con++;
44604 machine_mode mode = CCmode;
44605 rtx_code code = UNKNOWN;
44607 switch (con[0])
44609 case 'a':
44610 if (con[1] == 0)
44611 mode = CCAmode, code = EQ;
44612 else if (con[1] == 'e' && con[2] == 0)
44613 mode = CCCmode, code = NE;
44614 break;
44615 case 'b':
44616 if (con[1] == 0)
44617 mode = CCCmode, code = EQ;
44618 else if (con[1] == 'e' && con[2] == 0)
44619 mode = CCAmode, code = NE;
44620 break;
44621 case 'c':
44622 if (con[1] == 0)
44623 mode = CCCmode, code = EQ;
44624 break;
44625 case 'e':
44626 if (con[1] == 0)
44627 mode = CCZmode, code = EQ;
44628 break;
44629 case 'g':
44630 if (con[1] == 0)
44631 mode = CCGCmode, code = GT;
44632 else if (con[1] == 'e' && con[2] == 0)
44633 mode = CCGCmode, code = GE;
44634 break;
44635 case 'l':
44636 if (con[1] == 0)
44637 mode = CCGCmode, code = LT;
44638 else if (con[1] == 'e' && con[2] == 0)
44639 mode = CCGCmode, code = LE;
44640 break;
44641 case 'o':
44642 if (con[1] == 0)
44643 mode = CCOmode, code = EQ;
44644 break;
44645 case 'p':
44646 if (con[1] == 0)
44647 mode = CCPmode, code = EQ;
44648 break;
44649 case 's':
44650 if (con[1] == 0)
44651 mode = CCSmode, code = EQ;
44652 break;
44653 case 'z':
44654 if (con[1] == 0)
44655 mode = CCZmode, code = EQ;
44656 break;
44658 if (code == UNKNOWN)
44660 error ("unknown asm flag output %qs", constraints[i]);
44661 continue;
44663 if (invert)
44664 code = reverse_condition (code);
44666 rtx dest = outputs[i];
44667 if (!saw_asm_flag)
44669 /* This is the first asm flag output. Here we put the flags
44670 register in as the real output and adjust the condition to
44671 allow it. */
44672 constraints[i] = "=Bf";
44673 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
44674 saw_asm_flag = true;
44676 else
44678 /* We don't need the flags register as output twice. */
44679 constraints[i] = "=X";
44680 outputs[i] = gen_rtx_SCRATCH (SImode);
44683 rtx x = gen_rtx_REG (mode, FLAGS_REG);
44684 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
44686 machine_mode dest_mode = GET_MODE (dest);
44687 if (!SCALAR_INT_MODE_P (dest_mode))
44689 error ("invalid type for asm flag output");
44690 continue;
44693 if (dest_mode == DImode && !TARGET_64BIT)
44694 dest_mode = SImode;
44696 if (dest_mode != QImode)
44698 rtx destqi = gen_reg_rtx (QImode);
44699 emit_insn (gen_rtx_SET (destqi, x));
44701 if (TARGET_ZERO_EXTEND_WITH_AND
44702 && optimize_function_for_speed_p (cfun))
44704 x = force_reg (dest_mode, const0_rtx);
44706 emit_insn (gen_movstrictqi
44707 (gen_lowpart (QImode, x), destqi));
44709 else
44710 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
44713 if (dest_mode != GET_MODE (dest))
44715 rtx tmp = gen_reg_rtx (SImode);
44717 emit_insn (gen_rtx_SET (tmp, x));
44718 emit_insn (gen_zero_extendsidi2 (dest, tmp));
44720 else
44721 emit_insn (gen_rtx_SET (dest, x));
44723 rtx_insn *seq = get_insns ();
44724 end_sequence ();
44726 if (saw_asm_flag)
44727 return seq;
44728 else
44730 /* If we had no asm flag outputs, clobber the flags. */
44731 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
44732 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
44733 return NULL;
44737 /* Implements target vector targetm.asm.encode_section_info. */
44739 static void ATTRIBUTE_UNUSED
44740 ix86_encode_section_info (tree decl, rtx rtl, int first)
44742 default_encode_section_info (decl, rtl, first);
44744 if (ix86_in_large_data_p (decl))
44745 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
44748 /* Worker function for REVERSE_CONDITION. */
44750 enum rtx_code
44751 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
44753 return (mode == CCFPmode
44754 ? reverse_condition_maybe_unordered (code)
44755 : reverse_condition (code));
44758 /* Output code to perform an x87 FP register move, from OPERANDS[1]
44759 to OPERANDS[0]. */
44761 const char *
44762 output_387_reg_move (rtx_insn *insn, rtx *operands)
44764 if (REG_P (operands[0]))
44766 if (REG_P (operands[1])
44767 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
44769 if (REGNO (operands[0]) == FIRST_STACK_REG)
44770 return output_387_ffreep (operands, 0);
44771 return "fstp\t%y0";
44773 if (STACK_TOP_P (operands[0]))
44774 return "fld%Z1\t%y1";
44775 return "fst\t%y0";
44777 else if (MEM_P (operands[0]))
44779 gcc_assert (REG_P (operands[1]));
44780 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
44781 return "fstp%Z0\t%y0";
44782 else
44784 /* There is no non-popping store to memory for XFmode.
44785 So if we need one, follow the store with a load. */
44786 if (GET_MODE (operands[0]) == XFmode)
44787 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
44788 else
44789 return "fst%Z0\t%y0";
44792 else
44793 gcc_unreachable();
44796 /* Output code to perform a conditional jump to LABEL, if C2 flag in
44797 FP status register is set. */
44799 void
44800 ix86_emit_fp_unordered_jump (rtx label)
44802 rtx reg = gen_reg_rtx (HImode);
44803 rtx temp;
44805 emit_insn (gen_x86_fnstsw_1 (reg));
44807 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
44809 emit_insn (gen_x86_sahf_1 (reg));
44811 temp = gen_rtx_REG (CCmode, FLAGS_REG);
44812 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
44814 else
44816 emit_insn (gen_testqi_ext_1_ccno (reg, GEN_INT (0x04)));
44818 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
44819 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
44822 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
44823 gen_rtx_LABEL_REF (VOIDmode, label),
44824 pc_rtx);
44825 temp = gen_rtx_SET (pc_rtx, temp);
44827 emit_jump_insn (temp);
44828 predict_jump (REG_BR_PROB_BASE * 10 / 100);
44831 /* Output code to perform a log1p XFmode calculation. */
44833 void ix86_emit_i387_log1p (rtx op0, rtx op1)
44835 rtx_code_label *label1 = gen_label_rtx ();
44836 rtx_code_label *label2 = gen_label_rtx ();
44838 rtx tmp = gen_reg_rtx (XFmode);
44839 rtx tmp2 = gen_reg_rtx (XFmode);
44840 rtx test;
44842 emit_insn (gen_absxf2 (tmp, op1));
44843 test = gen_rtx_GE (VOIDmode, tmp,
44844 const_double_from_real_value (
44845 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
44846 XFmode));
44847 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
44849 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
44850 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
44851 emit_jump (label2);
44853 emit_label (label1);
44854 emit_move_insn (tmp, CONST1_RTX (XFmode));
44855 emit_insn (gen_addxf3 (tmp, op1, tmp));
44856 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
44857 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
44859 emit_label (label2);
44862 /* Emit code for round calculation. */
44863 void ix86_emit_i387_round (rtx op0, rtx op1)
44865 machine_mode inmode = GET_MODE (op1);
44866 machine_mode outmode = GET_MODE (op0);
44867 rtx e1, e2, res, tmp, tmp1, half;
44868 rtx scratch = gen_reg_rtx (HImode);
44869 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
44870 rtx_code_label *jump_label = gen_label_rtx ();
44871 rtx insn;
44872 rtx (*gen_abs) (rtx, rtx);
44873 rtx (*gen_neg) (rtx, rtx);
44875 switch (inmode)
44877 case E_SFmode:
44878 gen_abs = gen_abssf2;
44879 break;
44880 case E_DFmode:
44881 gen_abs = gen_absdf2;
44882 break;
44883 case E_XFmode:
44884 gen_abs = gen_absxf2;
44885 break;
44886 default:
44887 gcc_unreachable ();
44890 switch (outmode)
44892 case E_SFmode:
44893 gen_neg = gen_negsf2;
44894 break;
44895 case E_DFmode:
44896 gen_neg = gen_negdf2;
44897 break;
44898 case E_XFmode:
44899 gen_neg = gen_negxf2;
44900 break;
44901 case E_HImode:
44902 gen_neg = gen_neghi2;
44903 break;
44904 case E_SImode:
44905 gen_neg = gen_negsi2;
44906 break;
44907 case E_DImode:
44908 gen_neg = gen_negdi2;
44909 break;
44910 default:
44911 gcc_unreachable ();
44914 e1 = gen_reg_rtx (inmode);
44915 e2 = gen_reg_rtx (inmode);
44916 res = gen_reg_rtx (outmode);
44918 half = const_double_from_real_value (dconsthalf, inmode);
44920 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
44922 /* scratch = fxam(op1) */
44923 emit_insn (gen_rtx_SET (scratch,
44924 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
44925 UNSPEC_FXAM)));
44926 /* e1 = fabs(op1) */
44927 emit_insn (gen_abs (e1, op1));
44929 /* e2 = e1 + 0.5 */
44930 half = force_reg (inmode, half);
44931 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
44933 /* res = floor(e2) */
44934 if (inmode != XFmode)
44936 tmp1 = gen_reg_rtx (XFmode);
44938 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
44940 else
44941 tmp1 = e2;
44943 switch (outmode)
44945 case E_SFmode:
44946 case E_DFmode:
44948 rtx tmp0 = gen_reg_rtx (XFmode);
44950 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
44952 emit_insn (gen_rtx_SET (res,
44953 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
44954 UNSPEC_TRUNC_NOOP)));
44956 break;
44957 case E_XFmode:
44958 emit_insn (gen_frndintxf2_floor (res, tmp1));
44959 break;
44960 case E_HImode:
44961 emit_insn (gen_lfloorxfhi2 (res, tmp1));
44962 break;
44963 case E_SImode:
44964 emit_insn (gen_lfloorxfsi2 (res, tmp1));
44965 break;
44966 case E_DImode:
44967 emit_insn (gen_lfloorxfdi2 (res, tmp1));
44968 break;
44969 default:
44970 gcc_unreachable ();
44973 /* flags = signbit(a) */
44974 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
44976 /* if (flags) then res = -res */
44977 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
44978 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
44979 gen_rtx_LABEL_REF (VOIDmode, jump_label),
44980 pc_rtx);
44981 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
44982 predict_jump (REG_BR_PROB_BASE * 50 / 100);
44983 JUMP_LABEL (insn) = jump_label;
44985 emit_insn (gen_neg (res, res));
44987 emit_label (jump_label);
44988 LABEL_NUSES (jump_label) = 1;
44990 emit_move_insn (op0, res);
44993 /* Output code to perform a Newton-Rhapson approximation of a single precision
44994 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
44996 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
44998 rtx x0, x1, e0, e1;
45000 x0 = gen_reg_rtx (mode);
45001 e0 = gen_reg_rtx (mode);
45002 e1 = gen_reg_rtx (mode);
45003 x1 = gen_reg_rtx (mode);
45005 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45007 b = force_reg (mode, b);
45009 /* x0 = rcp(b) estimate */
45010 if (mode == V16SFmode || mode == V8DFmode)
45012 if (TARGET_AVX512ER)
45014 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45015 UNSPEC_RCP28)));
45016 /* res = a * x0 */
45017 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0)));
45018 return;
45020 else
45021 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45022 UNSPEC_RCP14)));
45024 else
45025 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45026 UNSPEC_RCP)));
45028 /* e0 = x0 * b */
45029 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
45031 /* e0 = x0 * e0 */
45032 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
45034 /* e1 = x0 + x0 */
45035 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
45037 /* x1 = e1 - e0 */
45038 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
45040 /* res = a * x1 */
45041 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
45044 /* Output code to perform a Newton-Rhapson approximation of a
45045 single precision floating point [reciprocal] square root. */
45047 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
45049 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45050 REAL_VALUE_TYPE r;
45051 int unspec;
45053 x0 = gen_reg_rtx (mode);
45054 e0 = gen_reg_rtx (mode);
45055 e1 = gen_reg_rtx (mode);
45056 e2 = gen_reg_rtx (mode);
45057 e3 = gen_reg_rtx (mode);
45059 if (TARGET_AVX512ER && mode == V16SFmode)
45061 if (recip)
45062 /* res = rsqrt28(a) estimate */
45063 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45064 UNSPEC_RSQRT28)));
45065 else
45067 /* x0 = rsqrt28(a) estimate */
45068 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45069 UNSPEC_RSQRT28)));
45070 /* res = rcp28(x0) estimate */
45071 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0),
45072 UNSPEC_RCP28)));
45074 return;
45077 real_from_integer (&r, VOIDmode, -3, SIGNED);
45078 mthree = const_double_from_real_value (r, SFmode);
45080 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45081 mhalf = const_double_from_real_value (r, SFmode);
45082 unspec = UNSPEC_RSQRT;
45084 if (VECTOR_MODE_P (mode))
45086 mthree = ix86_build_const_vector (mode, true, mthree);
45087 mhalf = ix86_build_const_vector (mode, true, mhalf);
45088 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45089 if (GET_MODE_SIZE (mode) == 64)
45090 unspec = UNSPEC_RSQRT14;
45093 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45094 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45096 a = force_reg (mode, a);
45098 /* x0 = rsqrt(a) estimate */
45099 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45100 unspec)));
45102 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45103 if (!recip)
45105 rtx zero = force_reg (mode, CONST0_RTX(mode));
45106 rtx mask;
45108 /* Handle masked compare. */
45109 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45111 mask = gen_reg_rtx (HImode);
45112 /* Imm value 0x4 corresponds to not-equal comparison. */
45113 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45114 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45116 else
45118 mask = gen_reg_rtx (mode);
45119 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
45120 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
45124 /* e0 = x0 * a */
45125 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
45126 /* e1 = e0 * x0 */
45127 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
45129 /* e2 = e1 - 3. */
45130 mthree = force_reg (mode, mthree);
45131 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
45133 mhalf = force_reg (mode, mhalf);
45134 if (recip)
45135 /* e3 = -.5 * x0 */
45136 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
45137 else
45138 /* e3 = -.5 * e0 */
45139 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
45140 /* ret = e2 * e3 */
45141 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
45144 #ifdef TARGET_SOLARIS
45145 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45147 static void
45148 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45149 tree decl)
45151 /* With Binutils 2.15, the "@unwind" marker must be specified on
45152 every occurrence of the ".eh_frame" section, not just the first
45153 one. */
45154 if (TARGET_64BIT
45155 && strcmp (name, ".eh_frame") == 0)
45157 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45158 flags & SECTION_WRITE ? "aw" : "a");
45159 return;
45162 #ifndef USE_GAS
45163 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45165 solaris_elf_asm_comdat_section (name, flags, decl);
45166 return;
45168 #endif
45170 default_elf_asm_named_section (name, flags, decl);
45172 #endif /* TARGET_SOLARIS */
45174 /* Return the mangling of TYPE if it is an extended fundamental type. */
45176 static const char *
45177 ix86_mangle_type (const_tree type)
45179 type = TYPE_MAIN_VARIANT (type);
45181 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45182 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45183 return NULL;
45185 switch (TYPE_MODE (type))
45187 case E_TFmode:
45188 /* __float128 is "g". */
45189 return "g";
45190 case E_XFmode:
45191 /* "long double" or __float80 is "e". */
45192 return "e";
45193 default:
45194 return NULL;
45198 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
45200 static tree
45201 ix86_stack_protect_guard (void)
45203 if (TARGET_SSP_TLS_GUARD)
45205 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
45206 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
45207 tree type = build_qualified_type (type_node, qual);
45208 tree t;
45210 if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
45212 t = ix86_tls_stack_chk_guard_decl;
45214 if (t == NULL)
45216 rtx x;
45218 t = build_decl
45219 (UNKNOWN_LOCATION, VAR_DECL,
45220 get_identifier (ix86_stack_protector_guard_symbol_str),
45221 type);
45222 TREE_STATIC (t) = 1;
45223 TREE_PUBLIC (t) = 1;
45224 DECL_EXTERNAL (t) = 1;
45225 TREE_USED (t) = 1;
45226 TREE_THIS_VOLATILE (t) = 1;
45227 DECL_ARTIFICIAL (t) = 1;
45228 DECL_IGNORED_P (t) = 1;
45230 /* Do not share RTL as the declaration is visible outside of
45231 current function. */
45232 x = DECL_RTL (t);
45233 RTX_FLAG (x, used) = 1;
45235 ix86_tls_stack_chk_guard_decl = t;
45238 else
45240 tree asptrtype = build_pointer_type (type);
45242 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
45243 t = build2 (MEM_REF, asptrtype, t,
45244 build_int_cst (asptrtype, 0));
45247 return t;
45250 return default_stack_protect_guard ();
45253 /* For 32-bit code we can save PIC register setup by using
45254 __stack_chk_fail_local hidden function instead of calling
45255 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45256 register, so it is better to call __stack_chk_fail directly. */
45258 static tree ATTRIBUTE_UNUSED
45259 ix86_stack_protect_fail (void)
45261 return TARGET_64BIT
45262 ? default_external_stack_protect_fail ()
45263 : default_hidden_stack_protect_fail ();
45266 /* Select a format to encode pointers in exception handling data. CODE
45267 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45268 true if the symbol may be affected by dynamic relocations.
45270 ??? All x86 object file formats are capable of representing this.
45271 After all, the relocation needed is the same as for the call insn.
45272 Whether or not a particular assembler allows us to enter such, I
45273 guess we'll have to see. */
45275 asm_preferred_eh_data_format (int code, int global)
45277 if (flag_pic)
45279 int type = DW_EH_PE_sdata8;
45280 if (!TARGET_64BIT
45281 || ix86_cmodel == CM_SMALL_PIC
45282 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45283 type = DW_EH_PE_sdata4;
45284 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45286 if (ix86_cmodel == CM_SMALL
45287 || (ix86_cmodel == CM_MEDIUM && code))
45288 return DW_EH_PE_udata4;
45289 return DW_EH_PE_absptr;
45292 /* Expand copysign from SIGN to the positive value ABS_VALUE
45293 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45294 the sign-bit. */
45295 static void
45296 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45298 machine_mode mode = GET_MODE (sign);
45299 rtx sgn = gen_reg_rtx (mode);
45300 if (mask == NULL_RTX)
45302 machine_mode vmode;
45304 if (mode == SFmode)
45305 vmode = V4SFmode;
45306 else if (mode == DFmode)
45307 vmode = V2DFmode;
45308 else
45309 vmode = mode;
45311 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45312 if (!VECTOR_MODE_P (mode))
45314 /* We need to generate a scalar mode mask in this case. */
45315 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45316 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45317 mask = gen_reg_rtx (mode);
45318 emit_insn (gen_rtx_SET (mask, tmp));
45321 else
45322 mask = gen_rtx_NOT (mode, mask);
45323 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
45324 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
45327 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45328 mask for masking out the sign-bit is stored in *SMASK, if that is
45329 non-null. */
45330 static rtx
45331 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45333 machine_mode vmode, mode = GET_MODE (op0);
45334 rtx xa, mask;
45336 xa = gen_reg_rtx (mode);
45337 if (mode == SFmode)
45338 vmode = V4SFmode;
45339 else if (mode == DFmode)
45340 vmode = V2DFmode;
45341 else
45342 vmode = mode;
45343 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45344 if (!VECTOR_MODE_P (mode))
45346 /* We need to generate a scalar mode mask in this case. */
45347 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45348 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45349 mask = gen_reg_rtx (mode);
45350 emit_insn (gen_rtx_SET (mask, tmp));
45352 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
45354 if (smask)
45355 *smask = mask;
45357 return xa;
45360 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45361 swapping the operands if SWAP_OPERANDS is true. The expanded
45362 code is a forward jump to a newly created label in case the
45363 comparison is true. The generated label rtx is returned. */
45364 static rtx_code_label *
45365 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45366 bool swap_operands)
45368 bool unordered_compare = ix86_unordered_fp_compare (code);
45369 rtx_code_label *label;
45370 rtx tmp, reg;
45372 if (swap_operands)
45373 std::swap (op0, op1);
45375 label = gen_label_rtx ();
45376 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
45377 if (unordered_compare)
45378 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
45379 reg = gen_rtx_REG (CCFPmode, FLAGS_REG);
45380 emit_insn (gen_rtx_SET (reg, tmp));
45381 tmp = gen_rtx_fmt_ee (code, VOIDmode, reg, const0_rtx);
45382 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
45383 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
45384 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
45385 JUMP_LABEL (tmp) = label;
45387 return label;
45390 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
45391 using comparison code CODE. Operands are swapped for the comparison if
45392 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
45393 static rtx
45394 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
45395 bool swap_operands)
45397 rtx (*insn)(rtx, rtx, rtx, rtx);
45398 machine_mode mode = GET_MODE (op0);
45399 rtx mask = gen_reg_rtx (mode);
45401 if (swap_operands)
45402 std::swap (op0, op1);
45404 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
45406 emit_insn (insn (mask, op0, op1,
45407 gen_rtx_fmt_ee (code, mode, op0, op1)));
45408 return mask;
45411 /* Generate and return a rtx of mode MODE for 2**n where n is the number
45412 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
45413 static rtx
45414 ix86_gen_TWO52 (machine_mode mode)
45416 REAL_VALUE_TYPE TWO52r;
45417 rtx TWO52;
45419 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
45420 TWO52 = const_double_from_real_value (TWO52r, mode);
45421 TWO52 = force_reg (mode, TWO52);
45423 return TWO52;
45426 /* Expand SSE sequence for computing lround from OP1 storing
45427 into OP0. */
45428 void
45429 ix86_expand_lround (rtx op0, rtx op1)
45431 /* C code for the stuff we're doing below:
45432 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
45433 return (long)tmp;
45435 machine_mode mode = GET_MODE (op1);
45436 const struct real_format *fmt;
45437 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45438 rtx adj;
45440 /* load nextafter (0.5, 0.0) */
45441 fmt = REAL_MODE_FORMAT (mode);
45442 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45443 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
45445 /* adj = copysign (0.5, op1) */
45446 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
45447 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
45449 /* adj = op1 + adj */
45450 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
45452 /* op0 = (imode)adj */
45453 expand_fix (op0, adj, 0);
45456 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
45457 into OPERAND0. */
45458 void
45459 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
45461 /* C code for the stuff we're doing below (for do_floor):
45462 xi = (long)op1;
45463 xi -= (double)xi > op1 ? 1 : 0;
45464 return xi;
45466 machine_mode fmode = GET_MODE (op1);
45467 machine_mode imode = GET_MODE (op0);
45468 rtx ireg, freg, tmp;
45469 rtx_code_label *label;
45471 /* reg = (long)op1 */
45472 ireg = gen_reg_rtx (imode);
45473 expand_fix (ireg, op1, 0);
45475 /* freg = (double)reg */
45476 freg = gen_reg_rtx (fmode);
45477 expand_float (freg, ireg, 0);
45479 /* ireg = (freg > op1) ? ireg - 1 : ireg */
45480 label = ix86_expand_sse_compare_and_jump (UNLE,
45481 freg, op1, !do_floor);
45482 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
45483 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
45484 emit_move_insn (ireg, tmp);
45486 emit_label (label);
45487 LABEL_NUSES (label) = 1;
45489 emit_move_insn (op0, ireg);
45492 /* Expand rint rounding OPERAND1 and storing the result in OPERAND0. */
45493 void
45494 ix86_expand_rint (rtx operand0, rtx operand1)
45496 /* C code for the stuff we're doing below:
45497 xa = fabs (operand1);
45498 if (!isless (xa, 2**52))
45499 return operand1;
45500 two52 = 2**52;
45501 if (flag_rounding_math)
45503 two52 = copysign (two52, operand1);
45504 xa = operand1;
45506 xa = xa + two52 - two52;
45507 return copysign (xa, operand1);
45509 machine_mode mode = GET_MODE (operand0);
45510 rtx res, xa, TWO52, two52, mask;
45511 rtx_code_label *label;
45513 res = gen_reg_rtx (mode);
45514 emit_move_insn (res, operand1);
45516 /* xa = abs (operand1) */
45517 xa = ix86_expand_sse_fabs (res, &mask);
45519 /* if (!isless (xa, TWO52)) goto label; */
45520 TWO52 = ix86_gen_TWO52 (mode);
45521 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45523 two52 = TWO52;
45524 if (flag_rounding_math)
45526 two52 = gen_reg_rtx (mode);
45527 ix86_sse_copysign_to_positive (two52, TWO52, res, mask);
45528 xa = res;
45531 xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT);
45532 xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT);
45534 ix86_sse_copysign_to_positive (res, xa, res, mask);
45536 emit_label (label);
45537 LABEL_NUSES (label) = 1;
45539 emit_move_insn (operand0, res);
45542 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
45543 into OPERAND0. */
45544 void
45545 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
45547 /* C code for the stuff we expand below.
45548 double xa = fabs (x), x2;
45549 if (!isless (xa, TWO52))
45550 return x;
45551 xa = xa + TWO52 - TWO52;
45552 x2 = copysign (xa, x);
45553 Compensate. Floor:
45554 if (x2 > x)
45555 x2 -= 1;
45556 Compensate. Ceil:
45557 if (x2 < x)
45558 x2 -= -1;
45559 return x2;
45561 machine_mode mode = GET_MODE (operand0);
45562 rtx xa, TWO52, tmp, one, res, mask;
45563 rtx_code_label *label;
45565 TWO52 = ix86_gen_TWO52 (mode);
45567 /* Temporary for holding the result, initialized to the input
45568 operand to ease control flow. */
45569 res = gen_reg_rtx (mode);
45570 emit_move_insn (res, operand1);
45572 /* xa = abs (operand1) */
45573 xa = ix86_expand_sse_fabs (res, &mask);
45575 /* if (!isless (xa, TWO52)) goto label; */
45576 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45578 /* xa = xa + TWO52 - TWO52; */
45579 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45580 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
45582 /* xa = copysign (xa, operand1) */
45583 ix86_sse_copysign_to_positive (xa, xa, res, mask);
45585 /* generate 1.0 or -1.0 */
45586 one = force_reg (mode,
45587 const_double_from_real_value (do_floor
45588 ? dconst1 : dconstm1, mode));
45590 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
45591 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
45592 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45593 /* We always need to subtract here to preserve signed zero. */
45594 tmp = expand_simple_binop (mode, MINUS,
45595 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45596 emit_move_insn (res, tmp);
45598 emit_label (label);
45599 LABEL_NUSES (label) = 1;
45601 emit_move_insn (operand0, res);
45604 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
45605 into OPERAND0. */
45606 void
45607 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
45609 /* C code for the stuff we expand below.
45610 double xa = fabs (x), x2;
45611 if (!isless (xa, TWO52))
45612 return x;
45613 x2 = (double)(long)x;
45614 Compensate. Floor:
45615 if (x2 > x)
45616 x2 -= 1;
45617 Compensate. Ceil:
45618 if (x2 < x)
45619 x2 += 1;
45620 if (HONOR_SIGNED_ZEROS (mode))
45621 return copysign (x2, x);
45622 return x2;
45624 machine_mode mode = GET_MODE (operand0);
45625 rtx xa, xi, TWO52, tmp, one, res, mask;
45626 rtx_code_label *label;
45628 TWO52 = ix86_gen_TWO52 (mode);
45630 /* Temporary for holding the result, initialized to the input
45631 operand to ease control flow. */
45632 res = gen_reg_rtx (mode);
45633 emit_move_insn (res, operand1);
45635 /* xa = abs (operand1) */
45636 xa = ix86_expand_sse_fabs (res, &mask);
45638 /* if (!isless (xa, TWO52)) goto label; */
45639 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45641 /* xa = (double)(long)x */
45642 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45643 expand_fix (xi, res, 0);
45644 expand_float (xa, xi, 0);
45646 /* generate 1.0 */
45647 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
45649 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
45650 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
45651 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45652 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
45653 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45654 emit_move_insn (res, tmp);
45656 if (HONOR_SIGNED_ZEROS (mode))
45657 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
45659 emit_label (label);
45660 LABEL_NUSES (label) = 1;
45662 emit_move_insn (operand0, res);
45665 /* Expand SSE sequence for computing round from OPERAND1 storing
45666 into OPERAND0. Sequence that works without relying on DImode truncation
45667 via cvttsd2siq that is only available on 64bit targets. */
45668 void
45669 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
45671 /* C code for the stuff we expand below.
45672 double xa = fabs (x), xa2, x2;
45673 if (!isless (xa, TWO52))
45674 return x;
45675 Using the absolute value and copying back sign makes
45676 -0.0 -> -0.0 correct.
45677 xa2 = xa + TWO52 - TWO52;
45678 Compensate.
45679 dxa = xa2 - xa;
45680 if (dxa <= -0.5)
45681 xa2 += 1;
45682 else if (dxa > 0.5)
45683 xa2 -= 1;
45684 x2 = copysign (xa2, x);
45685 return x2;
45687 machine_mode mode = GET_MODE (operand0);
45688 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
45689 rtx_code_label *label;
45691 TWO52 = ix86_gen_TWO52 (mode);
45693 /* Temporary for holding the result, initialized to the input
45694 operand to ease control flow. */
45695 res = gen_reg_rtx (mode);
45696 emit_move_insn (res, operand1);
45698 /* xa = abs (operand1) */
45699 xa = ix86_expand_sse_fabs (res, &mask);
45701 /* if (!isless (xa, TWO52)) goto label; */
45702 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45704 /* xa2 = xa + TWO52 - TWO52; */
45705 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45706 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
45708 /* dxa = xa2 - xa; */
45709 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
45711 /* generate 0.5, 1.0 and -0.5 */
45712 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
45713 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
45714 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
45715 0, OPTAB_DIRECT);
45717 /* Compensate. */
45718 tmp = gen_reg_rtx (mode);
45719 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
45720 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
45721 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45722 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45723 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
45724 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
45725 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45726 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45728 /* res = copysign (xa2, operand1) */
45729 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
45731 emit_label (label);
45732 LABEL_NUSES (label) = 1;
45734 emit_move_insn (operand0, res);
45737 /* Expand SSE sequence for computing trunc from OPERAND1 storing
45738 into OPERAND0. */
45739 void
45740 ix86_expand_trunc (rtx operand0, rtx operand1)
45742 /* C code for SSE variant we expand below.
45743 double xa = fabs (x), x2;
45744 if (!isless (xa, TWO52))
45745 return x;
45746 x2 = (double)(long)x;
45747 if (HONOR_SIGNED_ZEROS (mode))
45748 return copysign (x2, x);
45749 return x2;
45751 machine_mode mode = GET_MODE (operand0);
45752 rtx xa, xi, TWO52, res, mask;
45753 rtx_code_label *label;
45755 TWO52 = ix86_gen_TWO52 (mode);
45757 /* Temporary for holding the result, initialized to the input
45758 operand to ease control flow. */
45759 res = gen_reg_rtx (mode);
45760 emit_move_insn (res, operand1);
45762 /* xa = abs (operand1) */
45763 xa = ix86_expand_sse_fabs (res, &mask);
45765 /* if (!isless (xa, TWO52)) goto label; */
45766 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45768 /* x = (double)(long)x */
45769 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45770 expand_fix (xi, res, 0);
45771 expand_float (res, xi, 0);
45773 if (HONOR_SIGNED_ZEROS (mode))
45774 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
45776 emit_label (label);
45777 LABEL_NUSES (label) = 1;
45779 emit_move_insn (operand0, res);
45782 /* Expand SSE sequence for computing trunc from OPERAND1 storing
45783 into OPERAND0. */
45784 void
45785 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
45787 machine_mode mode = GET_MODE (operand0);
45788 rtx xa, mask, TWO52, one, res, smask, tmp;
45789 rtx_code_label *label;
45791 /* C code for SSE variant we expand below.
45792 double xa = fabs (x), x2;
45793 if (!isless (xa, TWO52))
45794 return x;
45795 xa2 = xa + TWO52 - TWO52;
45796 Compensate:
45797 if (xa2 > xa)
45798 xa2 -= 1.0;
45799 x2 = copysign (xa2, x);
45800 return x2;
45803 TWO52 = ix86_gen_TWO52 (mode);
45805 /* Temporary for holding the result, initialized to the input
45806 operand to ease control flow. */
45807 res = gen_reg_rtx (mode);
45808 emit_move_insn (res, operand1);
45810 /* xa = abs (operand1) */
45811 xa = ix86_expand_sse_fabs (res, &smask);
45813 /* if (!isless (xa, TWO52)) goto label; */
45814 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45816 /* res = xa + TWO52 - TWO52; */
45817 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45818 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
45819 emit_move_insn (res, tmp);
45821 /* generate 1.0 */
45822 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
45824 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
45825 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
45826 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
45827 tmp = expand_simple_binop (mode, MINUS,
45828 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
45829 emit_move_insn (res, tmp);
45831 /* res = copysign (res, operand1) */
45832 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
45834 emit_label (label);
45835 LABEL_NUSES (label) = 1;
45837 emit_move_insn (operand0, res);
45840 /* Expand SSE sequence for computing round from OPERAND1 storing
45841 into OPERAND0. */
45842 void
45843 ix86_expand_round (rtx operand0, rtx operand1)
45845 /* C code for the stuff we're doing below:
45846 double xa = fabs (x);
45847 if (!isless (xa, TWO52))
45848 return x;
45849 xa = (double)(long)(xa + nextafter (0.5, 0.0));
45850 return copysign (xa, x);
45852 machine_mode mode = GET_MODE (operand0);
45853 rtx res, TWO52, xa, xi, half, mask;
45854 rtx_code_label *label;
45855 const struct real_format *fmt;
45856 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45858 /* Temporary for holding the result, initialized to the input
45859 operand to ease control flow. */
45860 res = gen_reg_rtx (mode);
45861 emit_move_insn (res, operand1);
45863 TWO52 = ix86_gen_TWO52 (mode);
45864 xa = ix86_expand_sse_fabs (res, &mask);
45865 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45867 /* load nextafter (0.5, 0.0) */
45868 fmt = REAL_MODE_FORMAT (mode);
45869 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45870 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
45872 /* xa = xa + 0.5 */
45873 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
45874 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
45876 /* xa = (double)(int64_t)xa */
45877 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45878 expand_fix (xi, xa, 0);
45879 expand_float (xa, xi, 0);
45881 /* res = copysign (xa, operand1) */
45882 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
45884 emit_label (label);
45885 LABEL_NUSES (label) = 1;
45887 emit_move_insn (operand0, res);
45890 /* Expand SSE sequence for computing round
45891 from OP1 storing into OP0 using sse4 round insn. */
45892 void
45893 ix86_expand_round_sse4 (rtx op0, rtx op1)
45895 machine_mode mode = GET_MODE (op0);
45896 rtx e1, e2, res, half;
45897 const struct real_format *fmt;
45898 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45899 rtx (*gen_copysign) (rtx, rtx, rtx);
45900 rtx (*gen_round) (rtx, rtx, rtx);
45902 switch (mode)
45904 case E_SFmode:
45905 gen_copysign = gen_copysignsf3;
45906 gen_round = gen_sse4_1_roundsf2;
45907 break;
45908 case E_DFmode:
45909 gen_copysign = gen_copysigndf3;
45910 gen_round = gen_sse4_1_rounddf2;
45911 break;
45912 default:
45913 gcc_unreachable ();
45916 /* round (a) = trunc (a + copysign (0.5, a)) */
45918 /* load nextafter (0.5, 0.0) */
45919 fmt = REAL_MODE_FORMAT (mode);
45920 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45921 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
45922 half = const_double_from_real_value (pred_half, mode);
45924 /* e1 = copysign (0.5, op1) */
45925 e1 = gen_reg_rtx (mode);
45926 emit_insn (gen_copysign (e1, half, op1));
45928 /* e2 = op1 + e1 */
45929 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
45931 /* res = trunc (e2) */
45932 res = gen_reg_rtx (mode);
45933 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
45935 emit_move_insn (op0, res);
45939 /* Table of valid machine attributes. */
45940 static const struct attribute_spec ix86_attribute_table[] =
45942 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
45943 affects_type_identity, handler, exclude } */
45944 /* Stdcall attribute says callee is responsible for popping arguments
45945 if they are not variable. */
45946 { "stdcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45947 NULL },
45948 /* Fastcall attribute says callee is responsible for popping arguments
45949 if they are not variable. */
45950 { "fastcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45951 NULL },
45952 /* Thiscall attribute says callee is responsible for popping arguments
45953 if they are not variable. */
45954 { "thiscall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45955 NULL },
45956 /* Cdecl attribute says the callee is a normal C declaration */
45957 { "cdecl", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45958 NULL },
45959 /* Regparm attribute specifies how many integer arguments are to be
45960 passed in registers. */
45961 { "regparm", 1, 1, false, true, true, true, ix86_handle_cconv_attribute,
45962 NULL },
45963 /* Sseregparm attribute says we are using x86_64 calling conventions
45964 for FP arguments. */
45965 { "sseregparm", 0, 0, false, true, true, true, ix86_handle_cconv_attribute,
45966 NULL },
45967 /* The transactional memory builtins are implicitly regparm or fastcall
45968 depending on the ABI. Override the generic do-nothing attribute that
45969 these builtins were declared with. */
45970 { "*tm regparm", 0, 0, false, true, true, true,
45971 ix86_handle_tm_regparm_attribute, NULL },
45972 /* force_align_arg_pointer says this function realigns the stack at entry. */
45973 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
45974 false, true, true, false, ix86_handle_force_align_arg_pointer_attribute,
45975 NULL },
45976 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
45977 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
45978 NULL },
45979 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
45980 NULL },
45981 { "shared", 0, 0, true, false, false, false,
45982 ix86_handle_shared_attribute, NULL },
45983 #endif
45984 { "ms_struct", 0, 0, false, false, false, false,
45985 ix86_handle_struct_attribute, NULL },
45986 { "gcc_struct", 0, 0, false, false, false, false,
45987 ix86_handle_struct_attribute, NULL },
45988 #ifdef SUBTARGET_ATTRIBUTE_TABLE
45989 SUBTARGET_ATTRIBUTE_TABLE,
45990 #endif
45991 /* ms_abi and sysv_abi calling convention function attributes. */
45992 { "ms_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute, NULL },
45993 { "sysv_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute,
45994 NULL },
45995 { "ms_abi va_list", 0, 0, false, false, false, false, NULL, NULL },
45996 { "sysv_abi va_list", 0, 0, false, false, false, false, NULL, NULL },
45997 { "ms_hook_prologue", 0, 0, true, false, false, false,
45998 ix86_handle_fndecl_attribute, NULL },
45999 { "callee_pop_aggregate_return", 1, 1, false, true, true, true,
46000 ix86_handle_callee_pop_aggregate_return, NULL },
46001 { "interrupt", 0, 0, false, true, true, false,
46002 ix86_handle_interrupt_attribute, NULL },
46003 { "no_caller_saved_registers", 0, 0, false, true, true, false,
46004 ix86_handle_no_caller_saved_registers_attribute, NULL },
46005 { "naked", 0, 0, true, false, false, false,
46006 ix86_handle_fndecl_attribute, NULL },
46007 { "indirect_branch", 1, 1, true, false, false, false,
46008 ix86_handle_fndecl_attribute, NULL },
46009 { "function_return", 1, 1, true, false, false, false,
46010 ix86_handle_fndecl_attribute, NULL },
46012 /* End element. */
46013 { NULL, 0, 0, false, false, false, false, NULL, NULL }
46016 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46017 static int
46018 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46019 tree vectype, int)
46021 bool fp = false;
46022 machine_mode mode = TImode;
46023 int index;
46024 if (vectype != NULL)
46026 fp = FLOAT_TYPE_P (vectype);
46027 mode = TYPE_MODE (vectype);
46030 switch (type_of_cost)
46032 case scalar_stmt:
46033 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
46035 case scalar_load:
46036 /* load/store costs are relative to register move which is 2. Recompute
46037 it to COSTS_N_INSNS so everything have same base. */
46038 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
46039 : ix86_cost->int_load [2]) / 2;
46041 case scalar_store:
46042 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
46043 : ix86_cost->int_store [2]) / 2;
46045 case vector_stmt:
46046 return ix86_vec_cost (mode,
46047 fp ? ix86_cost->addss : ix86_cost->sse_op,
46048 true);
46050 case vector_load:
46051 index = sse_store_index (mode);
46052 /* See PR82713 - we may end up being called on non-vector type. */
46053 if (index < 0)
46054 index = 2;
46055 return ix86_vec_cost (mode,
46056 COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2,
46057 true);
46059 case vector_store:
46060 index = sse_store_index (mode);
46061 /* See PR82713 - we may end up being called on non-vector type. */
46062 if (index < 0)
46063 index = 2;
46064 return ix86_vec_cost (mode,
46065 COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2,
46066 true);
46068 case vec_to_scalar:
46069 case scalar_to_vec:
46070 return ix86_vec_cost (mode, ix86_cost->sse_op, true);
46072 /* We should have separate costs for unaligned loads and gather/scatter.
46073 Do that incrementally. */
46074 case unaligned_load:
46075 index = sse_store_index (mode);
46076 /* See PR82713 - we may end up being called on non-vector type. */
46077 if (index < 0)
46078 index = 2;
46079 return ix86_vec_cost (mode,
46080 COSTS_N_INSNS
46081 (ix86_cost->sse_unaligned_load[index]) / 2,
46082 true);
46084 case unaligned_store:
46085 index = sse_store_index (mode);
46086 /* See PR82713 - we may end up being called on non-vector type. */
46087 if (index < 0)
46088 index = 2;
46089 return ix86_vec_cost (mode,
46090 COSTS_N_INSNS
46091 (ix86_cost->sse_unaligned_store[index]) / 2,
46092 true);
46094 case vector_gather_load:
46095 return ix86_vec_cost (mode,
46096 COSTS_N_INSNS
46097 (ix86_cost->gather_static
46098 + ix86_cost->gather_per_elt
46099 * TYPE_VECTOR_SUBPARTS (vectype)) / 2,
46100 true);
46102 case vector_scatter_store:
46103 return ix86_vec_cost (mode,
46104 COSTS_N_INSNS
46105 (ix86_cost->scatter_static
46106 + ix86_cost->scatter_per_elt
46107 * TYPE_VECTOR_SUBPARTS (vectype)) / 2,
46108 true);
46110 case cond_branch_taken:
46111 return ix86_cost->cond_taken_branch_cost;
46113 case cond_branch_not_taken:
46114 return ix86_cost->cond_not_taken_branch_cost;
46116 case vec_perm:
46117 case vec_promote_demote:
46118 return ix86_vec_cost (mode,
46119 ix86_cost->sse_op, true);
46121 case vec_construct:
46123 /* N element inserts. */
46124 int cost = ix86_vec_cost (mode, ix86_cost->sse_op, false);
46125 /* One vinserti128 for combining two SSE vectors for AVX256. */
46126 if (GET_MODE_BITSIZE (mode) == 256)
46127 cost += ix86_vec_cost (mode, ix86_cost->addss, true);
46128 /* One vinserti64x4 and two vinserti128 for combining SSE
46129 and AVX256 vectors to AVX512. */
46130 else if (GET_MODE_BITSIZE (mode) == 512)
46131 cost += 3 * ix86_vec_cost (mode, ix86_cost->addss, true);
46132 return cost;
46135 default:
46136 gcc_unreachable ();
46140 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46141 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46142 insn every time. */
46144 static GTY(()) rtx_insn *vselect_insn;
46146 /* Initialize vselect_insn. */
46148 static void
46149 init_vselect_insn (void)
46151 unsigned i;
46152 rtx x;
46154 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46155 for (i = 0; i < MAX_VECT_LEN; ++i)
46156 XVECEXP (x, 0, i) = const0_rtx;
46157 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46158 const0_rtx), x);
46159 x = gen_rtx_SET (const0_rtx, x);
46160 start_sequence ();
46161 vselect_insn = emit_insn (x);
46162 end_sequence ();
46165 /* Construct (set target (vec_select op0 (parallel perm))) and
46166 return true if that's a valid instruction in the active ISA. */
46168 static bool
46169 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46170 unsigned nelt, bool testing_p)
46172 unsigned int i;
46173 rtx x, save_vconcat;
46174 int icode;
46176 if (vselect_insn == NULL_RTX)
46177 init_vselect_insn ();
46179 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46180 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46181 for (i = 0; i < nelt; ++i)
46182 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46183 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46184 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46185 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46186 SET_DEST (PATTERN (vselect_insn)) = target;
46187 icode = recog_memoized (vselect_insn);
46189 if (icode >= 0 && !testing_p)
46190 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46192 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46193 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46194 INSN_CODE (vselect_insn) = -1;
46196 return icode >= 0;
46199 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46201 static bool
46202 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46203 const unsigned char *perm, unsigned nelt,
46204 bool testing_p)
46206 machine_mode v2mode;
46207 rtx x;
46208 bool ok;
46210 if (vselect_insn == NULL_RTX)
46211 init_vselect_insn ();
46213 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
46214 return false;
46215 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46216 PUT_MODE (x, v2mode);
46217 XEXP (x, 0) = op0;
46218 XEXP (x, 1) = op1;
46219 ok = expand_vselect (target, x, perm, nelt, testing_p);
46220 XEXP (x, 0) = const0_rtx;
46221 XEXP (x, 1) = const0_rtx;
46222 return ok;
46225 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46226 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46228 static bool
46229 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46231 machine_mode mmode, vmode = d->vmode;
46232 unsigned i, mask, nelt = d->nelt;
46233 rtx target, op0, op1, maskop, x;
46234 rtx rperm[32], vperm;
46236 if (d->one_operand_p)
46237 return false;
46238 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46239 && (TARGET_AVX512BW
46240 || GET_MODE_UNIT_SIZE (vmode) >= 4))
46242 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46244 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46246 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46248 else
46249 return false;
46251 /* This is a blend, not a permute. Elements must stay in their
46252 respective lanes. */
46253 for (i = 0; i < nelt; ++i)
46255 unsigned e = d->perm[i];
46256 if (!(e == i || e == i + nelt))
46257 return false;
46260 if (d->testing_p)
46261 return true;
46263 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46264 decision should be extracted elsewhere, so that we only try that
46265 sequence once all budget==3 options have been tried. */
46266 target = d->target;
46267 op0 = d->op0;
46268 op1 = d->op1;
46269 mask = 0;
46271 switch (vmode)
46273 case E_V8DFmode:
46274 case E_V16SFmode:
46275 case E_V4DFmode:
46276 case E_V8SFmode:
46277 case E_V2DFmode:
46278 case E_V4SFmode:
46279 case E_V8HImode:
46280 case E_V8SImode:
46281 case E_V32HImode:
46282 case E_V64QImode:
46283 case E_V16SImode:
46284 case E_V8DImode:
46285 for (i = 0; i < nelt; ++i)
46286 mask |= (d->perm[i] >= nelt) << i;
46287 break;
46289 case E_V2DImode:
46290 for (i = 0; i < 2; ++i)
46291 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46292 vmode = V8HImode;
46293 goto do_subreg;
46295 case E_V4SImode:
46296 for (i = 0; i < 4; ++i)
46297 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46298 vmode = V8HImode;
46299 goto do_subreg;
46301 case E_V16QImode:
46302 /* See if bytes move in pairs so we can use pblendw with
46303 an immediate argument, rather than pblendvb with a vector
46304 argument. */
46305 for (i = 0; i < 16; i += 2)
46306 if (d->perm[i] + 1 != d->perm[i + 1])
46308 use_pblendvb:
46309 for (i = 0; i < nelt; ++i)
46310 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46312 finish_pblendvb:
46313 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46314 vperm = force_reg (vmode, vperm);
46316 if (GET_MODE_SIZE (vmode) == 16)
46317 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46318 else
46319 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46320 if (target != d->target)
46321 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46322 return true;
46325 for (i = 0; i < 8; ++i)
46326 mask |= (d->perm[i * 2] >= 16) << i;
46327 vmode = V8HImode;
46328 /* FALLTHRU */
46330 do_subreg:
46331 target = gen_reg_rtx (vmode);
46332 op0 = gen_lowpart (vmode, op0);
46333 op1 = gen_lowpart (vmode, op1);
46334 break;
46336 case E_V32QImode:
46337 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46338 for (i = 0; i < 32; i += 2)
46339 if (d->perm[i] + 1 != d->perm[i + 1])
46340 goto use_pblendvb;
46341 /* See if bytes move in quadruplets. If yes, vpblendd
46342 with immediate can be used. */
46343 for (i = 0; i < 32; i += 4)
46344 if (d->perm[i] + 2 != d->perm[i + 2])
46345 break;
46346 if (i < 32)
46348 /* See if bytes move the same in both lanes. If yes,
46349 vpblendw with immediate can be used. */
46350 for (i = 0; i < 16; i += 2)
46351 if (d->perm[i] + 16 != d->perm[i + 16])
46352 goto use_pblendvb;
46354 /* Use vpblendw. */
46355 for (i = 0; i < 16; ++i)
46356 mask |= (d->perm[i * 2] >= 32) << i;
46357 vmode = V16HImode;
46358 goto do_subreg;
46361 /* Use vpblendd. */
46362 for (i = 0; i < 8; ++i)
46363 mask |= (d->perm[i * 4] >= 32) << i;
46364 vmode = V8SImode;
46365 goto do_subreg;
46367 case E_V16HImode:
46368 /* See if words move in pairs. If yes, vpblendd can be used. */
46369 for (i = 0; i < 16; i += 2)
46370 if (d->perm[i] + 1 != d->perm[i + 1])
46371 break;
46372 if (i < 16)
46374 /* See if words move the same in both lanes. If not,
46375 vpblendvb must be used. */
46376 for (i = 0; i < 8; i++)
46377 if (d->perm[i] + 8 != d->perm[i + 8])
46379 /* Use vpblendvb. */
46380 for (i = 0; i < 32; ++i)
46381 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46383 vmode = V32QImode;
46384 nelt = 32;
46385 target = gen_reg_rtx (vmode);
46386 op0 = gen_lowpart (vmode, op0);
46387 op1 = gen_lowpart (vmode, op1);
46388 goto finish_pblendvb;
46391 /* Use vpblendw. */
46392 for (i = 0; i < 16; ++i)
46393 mask |= (d->perm[i] >= 16) << i;
46394 break;
46397 /* Use vpblendd. */
46398 for (i = 0; i < 8; ++i)
46399 mask |= (d->perm[i * 2] >= 16) << i;
46400 vmode = V8SImode;
46401 goto do_subreg;
46403 case E_V4DImode:
46404 /* Use vpblendd. */
46405 for (i = 0; i < 4; ++i)
46406 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46407 vmode = V8SImode;
46408 goto do_subreg;
46410 default:
46411 gcc_unreachable ();
46414 switch (vmode)
46416 case E_V8DFmode:
46417 case E_V8DImode:
46418 mmode = QImode;
46419 break;
46420 case E_V16SFmode:
46421 case E_V16SImode:
46422 mmode = HImode;
46423 break;
46424 case E_V32HImode:
46425 mmode = SImode;
46426 break;
46427 case E_V64QImode:
46428 mmode = DImode;
46429 break;
46430 default:
46431 mmode = VOIDmode;
46434 if (mmode != VOIDmode)
46435 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
46436 else
46437 maskop = GEN_INT (mask);
46439 /* This matches five different patterns with the different modes. */
46440 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
46441 x = gen_rtx_SET (target, x);
46442 emit_insn (x);
46443 if (target != d->target)
46444 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46446 return true;
46449 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46450 in terms of the variable form of vpermilps.
46452 Note that we will have already failed the immediate input vpermilps,
46453 which requires that the high and low part shuffle be identical; the
46454 variable form doesn't require that. */
46456 static bool
46457 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46459 rtx rperm[8], vperm;
46460 unsigned i;
46462 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46463 return false;
46465 /* We can only permute within the 128-bit lane. */
46466 for (i = 0; i < 8; ++i)
46468 unsigned e = d->perm[i];
46469 if (i < 4 ? e >= 4 : e < 4)
46470 return false;
46473 if (d->testing_p)
46474 return true;
46476 for (i = 0; i < 8; ++i)
46478 unsigned e = d->perm[i];
46480 /* Within each 128-bit lane, the elements of op0 are numbered
46481 from 0 and the elements of op1 are numbered from 4. */
46482 if (e >= 8 + 4)
46483 e -= 8;
46484 else if (e >= 4)
46485 e -= 4;
46487 rperm[i] = GEN_INT (e);
46490 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
46491 vperm = force_reg (V8SImode, vperm);
46492 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
46494 return true;
46497 /* Return true if permutation D can be performed as VMODE permutation
46498 instead. */
46500 static bool
46501 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
46503 unsigned int i, j, chunk;
46505 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
46506 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
46507 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
46508 return false;
46510 if (GET_MODE_NUNITS (vmode) >= d->nelt)
46511 return true;
46513 chunk = d->nelt / GET_MODE_NUNITS (vmode);
46514 for (i = 0; i < d->nelt; i += chunk)
46515 if (d->perm[i] & (chunk - 1))
46516 return false;
46517 else
46518 for (j = 1; j < chunk; ++j)
46519 if (d->perm[i] + j != d->perm[i + j])
46520 return false;
46522 return true;
46525 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46526 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
46528 static bool
46529 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
46531 unsigned i, nelt, eltsz, mask;
46532 unsigned char perm[64];
46533 machine_mode vmode = V16QImode;
46534 rtx rperm[64], vperm, target, op0, op1;
46536 nelt = d->nelt;
46538 if (!d->one_operand_p)
46540 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
46542 if (TARGET_AVX2
46543 && valid_perm_using_mode_p (V2TImode, d))
46545 if (d->testing_p)
46546 return true;
46548 /* Use vperm2i128 insn. The pattern uses
46549 V4DImode instead of V2TImode. */
46550 target = d->target;
46551 if (d->vmode != V4DImode)
46552 target = gen_reg_rtx (V4DImode);
46553 op0 = gen_lowpart (V4DImode, d->op0);
46554 op1 = gen_lowpart (V4DImode, d->op1);
46555 rperm[0]
46556 = GEN_INT ((d->perm[0] / (nelt / 2))
46557 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
46558 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
46559 if (target != d->target)
46560 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46561 return true;
46563 return false;
46566 else
46568 if (GET_MODE_SIZE (d->vmode) == 16)
46570 if (!TARGET_SSSE3)
46571 return false;
46573 else if (GET_MODE_SIZE (d->vmode) == 32)
46575 if (!TARGET_AVX2)
46576 return false;
46578 /* V4DImode should be already handled through
46579 expand_vselect by vpermq instruction. */
46580 gcc_assert (d->vmode != V4DImode);
46582 vmode = V32QImode;
46583 if (d->vmode == V8SImode
46584 || d->vmode == V16HImode
46585 || d->vmode == V32QImode)
46587 /* First see if vpermq can be used for
46588 V8SImode/V16HImode/V32QImode. */
46589 if (valid_perm_using_mode_p (V4DImode, d))
46591 for (i = 0; i < 4; i++)
46592 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
46593 if (d->testing_p)
46594 return true;
46595 target = gen_reg_rtx (V4DImode);
46596 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
46597 perm, 4, false))
46599 emit_move_insn (d->target,
46600 gen_lowpart (d->vmode, target));
46601 return true;
46603 return false;
46606 /* Next see if vpermd can be used. */
46607 if (valid_perm_using_mode_p (V8SImode, d))
46608 vmode = V8SImode;
46610 /* Or if vpermps can be used. */
46611 else if (d->vmode == V8SFmode)
46612 vmode = V8SImode;
46614 if (vmode == V32QImode)
46616 /* vpshufb only works intra lanes, it is not
46617 possible to shuffle bytes in between the lanes. */
46618 for (i = 0; i < nelt; ++i)
46619 if ((d->perm[i] ^ i) & (nelt / 2))
46620 return false;
46623 else if (GET_MODE_SIZE (d->vmode) == 64)
46625 if (!TARGET_AVX512BW)
46626 return false;
46628 /* If vpermq didn't work, vpshufb won't work either. */
46629 if (d->vmode == V8DFmode || d->vmode == V8DImode)
46630 return false;
46632 vmode = V64QImode;
46633 if (d->vmode == V16SImode
46634 || d->vmode == V32HImode
46635 || d->vmode == V64QImode)
46637 /* First see if vpermq can be used for
46638 V16SImode/V32HImode/V64QImode. */
46639 if (valid_perm_using_mode_p (V8DImode, d))
46641 for (i = 0; i < 8; i++)
46642 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
46643 if (d->testing_p)
46644 return true;
46645 target = gen_reg_rtx (V8DImode);
46646 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
46647 perm, 8, false))
46649 emit_move_insn (d->target,
46650 gen_lowpart (d->vmode, target));
46651 return true;
46653 return false;
46656 /* Next see if vpermd can be used. */
46657 if (valid_perm_using_mode_p (V16SImode, d))
46658 vmode = V16SImode;
46660 /* Or if vpermps can be used. */
46661 else if (d->vmode == V16SFmode)
46662 vmode = V16SImode;
46663 if (vmode == V64QImode)
46665 /* vpshufb only works intra lanes, it is not
46666 possible to shuffle bytes in between the lanes. */
46667 for (i = 0; i < nelt; ++i)
46668 if ((d->perm[i] ^ i) & (nelt / 4))
46669 return false;
46672 else
46673 return false;
46676 if (d->testing_p)
46677 return true;
46679 if (vmode == V8SImode)
46680 for (i = 0; i < 8; ++i)
46681 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
46682 else if (vmode == V16SImode)
46683 for (i = 0; i < 16; ++i)
46684 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
46685 else
46687 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46688 if (!d->one_operand_p)
46689 mask = 2 * nelt - 1;
46690 else if (vmode == V16QImode)
46691 mask = nelt - 1;
46692 else if (vmode == V64QImode)
46693 mask = nelt / 4 - 1;
46694 else
46695 mask = nelt / 2 - 1;
46697 for (i = 0; i < nelt; ++i)
46699 unsigned j, e = d->perm[i] & mask;
46700 for (j = 0; j < eltsz; ++j)
46701 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
46705 vperm = gen_rtx_CONST_VECTOR (vmode,
46706 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
46707 vperm = force_reg (vmode, vperm);
46709 target = d->target;
46710 if (d->vmode != vmode)
46711 target = gen_reg_rtx (vmode);
46712 op0 = gen_lowpart (vmode, d->op0);
46713 if (d->one_operand_p)
46715 if (vmode == V16QImode)
46716 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
46717 else if (vmode == V32QImode)
46718 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
46719 else if (vmode == V64QImode)
46720 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
46721 else if (vmode == V8SFmode)
46722 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
46723 else if (vmode == V8SImode)
46724 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
46725 else if (vmode == V16SFmode)
46726 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
46727 else if (vmode == V16SImode)
46728 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
46729 else
46730 gcc_unreachable ();
46732 else
46734 op1 = gen_lowpart (vmode, d->op1);
46735 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
46737 if (target != d->target)
46738 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46740 return true;
46743 /* For V*[QHS]Imode permutations, check if the same permutation
46744 can't be performed in a 2x, 4x or 8x wider inner mode. */
46746 static bool
46747 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
46748 struct expand_vec_perm_d *nd)
46750 int i;
46751 machine_mode mode = VOIDmode;
46753 switch (d->vmode)
46755 case E_V16QImode: mode = V8HImode; break;
46756 case E_V32QImode: mode = V16HImode; break;
46757 case E_V64QImode: mode = V32HImode; break;
46758 case E_V8HImode: mode = V4SImode; break;
46759 case E_V16HImode: mode = V8SImode; break;
46760 case E_V32HImode: mode = V16SImode; break;
46761 case E_V4SImode: mode = V2DImode; break;
46762 case E_V8SImode: mode = V4DImode; break;
46763 case E_V16SImode: mode = V8DImode; break;
46764 default: return false;
46766 for (i = 0; i < d->nelt; i += 2)
46767 if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
46768 return false;
46769 nd->vmode = mode;
46770 nd->nelt = d->nelt / 2;
46771 for (i = 0; i < nd->nelt; i++)
46772 nd->perm[i] = d->perm[2 * i] / 2;
46773 if (GET_MODE_INNER (mode) != DImode)
46774 canonicalize_vector_int_perm (nd, nd);
46775 if (nd != d)
46777 nd->one_operand_p = d->one_operand_p;
46778 nd->testing_p = d->testing_p;
46779 if (d->op0 == d->op1)
46780 nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
46781 else
46783 nd->op0 = gen_lowpart (nd->vmode, d->op0);
46784 nd->op1 = gen_lowpart (nd->vmode, d->op1);
46786 if (d->testing_p)
46787 nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
46788 else
46789 nd->target = gen_reg_rtx (nd->vmode);
46791 return true;
46794 /* Try to expand one-operand permutation with constant mask. */
46796 static bool
46797 ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d)
46799 machine_mode mode = GET_MODE (d->op0);
46800 machine_mode maskmode = mode;
46801 rtx (*gen) (rtx, rtx, rtx) = NULL;
46802 rtx target, op0, mask;
46803 rtx vec[64];
46805 if (!rtx_equal_p (d->op0, d->op1))
46806 return false;
46808 if (!TARGET_AVX512F)
46809 return false;
46811 switch (mode)
46813 case E_V16SImode:
46814 gen = gen_avx512f_permvarv16si;
46815 break;
46816 case E_V16SFmode:
46817 gen = gen_avx512f_permvarv16sf;
46818 maskmode = V16SImode;
46819 break;
46820 case E_V8DImode:
46821 gen = gen_avx512f_permvarv8di;
46822 break;
46823 case E_V8DFmode:
46824 gen = gen_avx512f_permvarv8df;
46825 maskmode = V8DImode;
46826 break;
46827 default:
46828 return false;
46831 target = d->target;
46832 op0 = d->op0;
46833 for (int i = 0; i < d->nelt; ++i)
46834 vec[i] = GEN_INT (d->perm[i]);
46835 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
46836 emit_insn (gen (target, op0, force_reg (maskmode, mask)));
46837 return true;
46840 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
46841 in a single instruction. */
46843 static bool
46844 expand_vec_perm_1 (struct expand_vec_perm_d *d)
46846 unsigned i, nelt = d->nelt;
46847 struct expand_vec_perm_d nd;
46849 /* Check plain VEC_SELECT first, because AVX has instructions that could
46850 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
46851 input where SEL+CONCAT may not. */
46852 if (d->one_operand_p)
46854 int mask = nelt - 1;
46855 bool identity_perm = true;
46856 bool broadcast_perm = true;
46858 for (i = 0; i < nelt; i++)
46860 nd.perm[i] = d->perm[i] & mask;
46861 if (nd.perm[i] != i)
46862 identity_perm = false;
46863 if (nd.perm[i])
46864 broadcast_perm = false;
46867 if (identity_perm)
46869 if (!d->testing_p)
46870 emit_move_insn (d->target, d->op0);
46871 return true;
46873 else if (broadcast_perm && TARGET_AVX2)
46875 /* Use vpbroadcast{b,w,d}. */
46876 rtx (*gen) (rtx, rtx) = NULL;
46877 switch (d->vmode)
46879 case E_V64QImode:
46880 if (TARGET_AVX512BW)
46881 gen = gen_avx512bw_vec_dupv64qi_1;
46882 break;
46883 case E_V32QImode:
46884 gen = gen_avx2_pbroadcastv32qi_1;
46885 break;
46886 case E_V32HImode:
46887 if (TARGET_AVX512BW)
46888 gen = gen_avx512bw_vec_dupv32hi_1;
46889 break;
46890 case E_V16HImode:
46891 gen = gen_avx2_pbroadcastv16hi_1;
46892 break;
46893 case E_V16SImode:
46894 if (TARGET_AVX512F)
46895 gen = gen_avx512f_vec_dupv16si_1;
46896 break;
46897 case E_V8SImode:
46898 gen = gen_avx2_pbroadcastv8si_1;
46899 break;
46900 case E_V16QImode:
46901 gen = gen_avx2_pbroadcastv16qi;
46902 break;
46903 case E_V8HImode:
46904 gen = gen_avx2_pbroadcastv8hi;
46905 break;
46906 case E_V16SFmode:
46907 if (TARGET_AVX512F)
46908 gen = gen_avx512f_vec_dupv16sf_1;
46909 break;
46910 case E_V8SFmode:
46911 gen = gen_avx2_vec_dupv8sf_1;
46912 break;
46913 case E_V8DFmode:
46914 if (TARGET_AVX512F)
46915 gen = gen_avx512f_vec_dupv8df_1;
46916 break;
46917 case E_V8DImode:
46918 if (TARGET_AVX512F)
46919 gen = gen_avx512f_vec_dupv8di_1;
46920 break;
46921 /* For other modes prefer other shuffles this function creates. */
46922 default: break;
46924 if (gen != NULL)
46926 if (!d->testing_p)
46927 emit_insn (gen (d->target, d->op0));
46928 return true;
46932 if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
46933 return true;
46935 /* There are plenty of patterns in sse.md that are written for
46936 SEL+CONCAT and are not replicated for a single op. Perhaps
46937 that should be changed, to avoid the nastiness here. */
46939 /* Recognize interleave style patterns, which means incrementing
46940 every other permutation operand. */
46941 for (i = 0; i < nelt; i += 2)
46943 nd.perm[i] = d->perm[i] & mask;
46944 nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
46946 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
46947 d->testing_p))
46948 return true;
46950 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
46951 if (nelt >= 4)
46953 for (i = 0; i < nelt; i += 4)
46955 nd.perm[i + 0] = d->perm[i + 0] & mask;
46956 nd.perm[i + 1] = d->perm[i + 1] & mask;
46957 nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
46958 nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
46961 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
46962 d->testing_p))
46963 return true;
46967 /* Finally, try the fully general two operand permute. */
46968 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
46969 d->testing_p))
46970 return true;
46972 /* Recognize interleave style patterns with reversed operands. */
46973 if (!d->one_operand_p)
46975 for (i = 0; i < nelt; ++i)
46977 unsigned e = d->perm[i];
46978 if (e >= nelt)
46979 e -= nelt;
46980 else
46981 e += nelt;
46982 nd.perm[i] = e;
46985 if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
46986 d->testing_p))
46987 return true;
46990 /* Try the SSE4.1 blend variable merge instructions. */
46991 if (expand_vec_perm_blend (d))
46992 return true;
46994 /* Try one of the AVX vpermil variable permutations. */
46995 if (expand_vec_perm_vpermil (d))
46996 return true;
46998 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
46999 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47000 if (expand_vec_perm_pshufb (d))
47001 return true;
47003 /* Try the AVX2 vpalignr instruction. */
47004 if (expand_vec_perm_palignr (d, true))
47005 return true;
47007 /* Try the AVX512F vperm{s,d} instructions. */
47008 if (ix86_expand_vec_one_operand_perm_avx512 (d))
47009 return true;
47011 /* Try the AVX512F vpermt2/vpermi2 instructions. */
47012 if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47013 return true;
47015 /* See if we can get the same permutation in different vector integer
47016 mode. */
47017 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
47019 if (!d->testing_p)
47020 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
47021 return true;
47023 return false;
47026 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47027 in terms of a pair of pshuflw + pshufhw instructions. */
47029 static bool
47030 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47032 unsigned char perm2[MAX_VECT_LEN];
47033 unsigned i;
47034 bool ok;
47036 if (d->vmode != V8HImode || !d->one_operand_p)
47037 return false;
47039 /* The two permutations only operate in 64-bit lanes. */
47040 for (i = 0; i < 4; ++i)
47041 if (d->perm[i] >= 4)
47042 return false;
47043 for (i = 4; i < 8; ++i)
47044 if (d->perm[i] < 4)
47045 return false;
47047 if (d->testing_p)
47048 return true;
47050 /* Emit the pshuflw. */
47051 memcpy (perm2, d->perm, 4);
47052 for (i = 4; i < 8; ++i)
47053 perm2[i] = i;
47054 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47055 gcc_assert (ok);
47057 /* Emit the pshufhw. */
47058 memcpy (perm2 + 4, d->perm + 4, 4);
47059 for (i = 0; i < 4; ++i)
47060 perm2[i] = i;
47061 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47062 gcc_assert (ok);
47064 return true;
47067 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47068 the permutation using the SSSE3 palignr instruction. This succeeds
47069 when all of the elements in PERM fit within one vector and we merely
47070 need to shift them down so that a single vector permutation has a
47071 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47072 the vpalignr instruction itself can perform the requested permutation. */
47074 static bool
47075 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47077 unsigned i, nelt = d->nelt;
47078 unsigned min, max, minswap, maxswap;
47079 bool in_order, ok, swap = false;
47080 rtx shift, target;
47081 struct expand_vec_perm_d dcopy;
47083 /* Even with AVX, palignr only operates on 128-bit vectors,
47084 in AVX2 palignr operates on both 128-bit lanes. */
47085 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47086 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47087 return false;
47089 min = 2 * nelt;
47090 max = 0;
47091 minswap = 2 * nelt;
47092 maxswap = 0;
47093 for (i = 0; i < nelt; ++i)
47095 unsigned e = d->perm[i];
47096 unsigned eswap = d->perm[i] ^ nelt;
47097 if (GET_MODE_SIZE (d->vmode) == 32)
47099 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47100 eswap = e ^ (nelt / 2);
47102 if (e < min)
47103 min = e;
47104 if (e > max)
47105 max = e;
47106 if (eswap < minswap)
47107 minswap = eswap;
47108 if (eswap > maxswap)
47109 maxswap = eswap;
47111 if (min == 0
47112 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47114 if (d->one_operand_p
47115 || minswap == 0
47116 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47117 ? nelt / 2 : nelt))
47118 return false;
47119 swap = true;
47120 min = minswap;
47121 max = maxswap;
47124 /* Given that we have SSSE3, we know we'll be able to implement the
47125 single operand permutation after the palignr with pshufb for
47126 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47127 first. */
47128 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47129 return true;
47131 dcopy = *d;
47132 if (swap)
47134 dcopy.op0 = d->op1;
47135 dcopy.op1 = d->op0;
47136 for (i = 0; i < nelt; ++i)
47137 dcopy.perm[i] ^= nelt;
47140 in_order = true;
47141 for (i = 0; i < nelt; ++i)
47143 unsigned e = dcopy.perm[i];
47144 if (GET_MODE_SIZE (d->vmode) == 32
47145 && e >= nelt
47146 && (e & (nelt / 2 - 1)) < min)
47147 e = e - min - (nelt / 2);
47148 else
47149 e = e - min;
47150 if (e != i)
47151 in_order = false;
47152 dcopy.perm[i] = e;
47154 dcopy.one_operand_p = true;
47156 if (single_insn_only_p && !in_order)
47157 return false;
47159 /* For AVX2, test whether we can permute the result in one instruction. */
47160 if (d->testing_p)
47162 if (in_order)
47163 return true;
47164 dcopy.op1 = dcopy.op0;
47165 return expand_vec_perm_1 (&dcopy);
47168 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
47169 if (GET_MODE_SIZE (d->vmode) == 16)
47171 target = gen_reg_rtx (TImode);
47172 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47173 gen_lowpart (TImode, dcopy.op0), shift));
47175 else
47177 target = gen_reg_rtx (V2TImode);
47178 emit_insn (gen_avx2_palignrv2ti (target,
47179 gen_lowpart (V2TImode, dcopy.op1),
47180 gen_lowpart (V2TImode, dcopy.op0),
47181 shift));
47184 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47186 /* Test for the degenerate case where the alignment by itself
47187 produces the desired permutation. */
47188 if (in_order)
47190 emit_move_insn (d->target, dcopy.op0);
47191 return true;
47194 ok = expand_vec_perm_1 (&dcopy);
47195 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47197 return ok;
47200 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47201 the permutation using the SSE4_1 pblendv instruction. Potentially
47202 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47204 static bool
47205 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47207 unsigned i, which, nelt = d->nelt;
47208 struct expand_vec_perm_d dcopy, dcopy1;
47209 machine_mode vmode = d->vmode;
47210 bool ok;
47212 /* Use the same checks as in expand_vec_perm_blend. */
47213 if (d->one_operand_p)
47214 return false;
47215 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47217 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47219 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47221 else
47222 return false;
47224 /* Figure out where permutation elements stay not in their
47225 respective lanes. */
47226 for (i = 0, which = 0; i < nelt; ++i)
47228 unsigned e = d->perm[i];
47229 if (e != i)
47230 which |= (e < nelt ? 1 : 2);
47232 /* We can pblend the part where elements stay not in their
47233 respective lanes only when these elements are all in one
47234 half of a permutation.
47235 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47236 lanes, but both 8 and 9 >= 8
47237 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47238 respective lanes and 8 >= 8, but 2 not. */
47239 if (which != 1 && which != 2)
47240 return false;
47241 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47242 return true;
47244 /* First we apply one operand permutation to the part where
47245 elements stay not in their respective lanes. */
47246 dcopy = *d;
47247 if (which == 2)
47248 dcopy.op0 = dcopy.op1 = d->op1;
47249 else
47250 dcopy.op0 = dcopy.op1 = d->op0;
47251 if (!d->testing_p)
47252 dcopy.target = gen_reg_rtx (vmode);
47253 dcopy.one_operand_p = true;
47255 for (i = 0; i < nelt; ++i)
47256 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47258 ok = expand_vec_perm_1 (&dcopy);
47259 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47260 return false;
47261 else
47262 gcc_assert (ok);
47263 if (d->testing_p)
47264 return true;
47266 /* Next we put permuted elements into their positions. */
47267 dcopy1 = *d;
47268 if (which == 2)
47269 dcopy1.op1 = dcopy.target;
47270 else
47271 dcopy1.op0 = dcopy.target;
47273 for (i = 0; i < nelt; ++i)
47274 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47276 ok = expand_vec_perm_blend (&dcopy1);
47277 gcc_assert (ok);
47279 return true;
47282 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47284 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47285 a two vector permutation into a single vector permutation by using
47286 an interleave operation to merge the vectors. */
47288 static bool
47289 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47291 struct expand_vec_perm_d dremap, dfinal;
47292 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47293 unsigned HOST_WIDE_INT contents;
47294 unsigned char remap[2 * MAX_VECT_LEN];
47295 rtx_insn *seq;
47296 bool ok, same_halves = false;
47298 if (GET_MODE_SIZE (d->vmode) == 16)
47300 if (d->one_operand_p)
47301 return false;
47303 else if (GET_MODE_SIZE (d->vmode) == 32)
47305 if (!TARGET_AVX)
47306 return false;
47307 /* For 32-byte modes allow even d->one_operand_p.
47308 The lack of cross-lane shuffling in some instructions
47309 might prevent a single insn shuffle. */
47310 dfinal = *d;
47311 dfinal.testing_p = true;
47312 /* If expand_vec_perm_interleave3 can expand this into
47313 a 3 insn sequence, give up and let it be expanded as
47314 3 insn sequence. While that is one insn longer,
47315 it doesn't need a memory operand and in the common
47316 case that both interleave low and high permutations
47317 with the same operands are adjacent needs 4 insns
47318 for both after CSE. */
47319 if (expand_vec_perm_interleave3 (&dfinal))
47320 return false;
47322 else
47323 return false;
47325 /* Examine from whence the elements come. */
47326 contents = 0;
47327 for (i = 0; i < nelt; ++i)
47328 contents |= HOST_WIDE_INT_1U << d->perm[i];
47330 memset (remap, 0xff, sizeof (remap));
47331 dremap = *d;
47333 if (GET_MODE_SIZE (d->vmode) == 16)
47335 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47337 /* Split the two input vectors into 4 halves. */
47338 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
47339 h2 = h1 << nelt2;
47340 h3 = h2 << nelt2;
47341 h4 = h3 << nelt2;
47343 /* If the elements from the low halves use interleave low, and similarly
47344 for interleave high. If the elements are from mis-matched halves, we
47345 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47346 if ((contents & (h1 | h3)) == contents)
47348 /* punpckl* */
47349 for (i = 0; i < nelt2; ++i)
47351 remap[i] = i * 2;
47352 remap[i + nelt] = i * 2 + 1;
47353 dremap.perm[i * 2] = i;
47354 dremap.perm[i * 2 + 1] = i + nelt;
47356 if (!TARGET_SSE2 && d->vmode == V4SImode)
47357 dremap.vmode = V4SFmode;
47359 else if ((contents & (h2 | h4)) == contents)
47361 /* punpckh* */
47362 for (i = 0; i < nelt2; ++i)
47364 remap[i + nelt2] = i * 2;
47365 remap[i + nelt + nelt2] = i * 2 + 1;
47366 dremap.perm[i * 2] = i + nelt2;
47367 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47369 if (!TARGET_SSE2 && d->vmode == V4SImode)
47370 dremap.vmode = V4SFmode;
47372 else if ((contents & (h1 | h4)) == contents)
47374 /* shufps */
47375 for (i = 0; i < nelt2; ++i)
47377 remap[i] = i;
47378 remap[i + nelt + nelt2] = i + nelt2;
47379 dremap.perm[i] = i;
47380 dremap.perm[i + nelt2] = i + nelt + nelt2;
47382 if (nelt != 4)
47384 /* shufpd */
47385 dremap.vmode = V2DImode;
47386 dremap.nelt = 2;
47387 dremap.perm[0] = 0;
47388 dremap.perm[1] = 3;
47391 else if ((contents & (h2 | h3)) == contents)
47393 /* shufps */
47394 for (i = 0; i < nelt2; ++i)
47396 remap[i + nelt2] = i;
47397 remap[i + nelt] = i + nelt2;
47398 dremap.perm[i] = i + nelt2;
47399 dremap.perm[i + nelt2] = i + nelt;
47401 if (nelt != 4)
47403 /* shufpd */
47404 dremap.vmode = V2DImode;
47405 dremap.nelt = 2;
47406 dremap.perm[0] = 1;
47407 dremap.perm[1] = 2;
47410 else
47411 return false;
47413 else
47415 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47416 unsigned HOST_WIDE_INT q[8];
47417 unsigned int nonzero_halves[4];
47419 /* Split the two input vectors into 8 quarters. */
47420 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
47421 for (i = 1; i < 8; ++i)
47422 q[i] = q[0] << (nelt4 * i);
47423 for (i = 0; i < 4; ++i)
47424 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47426 nonzero_halves[nzcnt] = i;
47427 ++nzcnt;
47430 if (nzcnt == 1)
47432 gcc_assert (d->one_operand_p);
47433 nonzero_halves[1] = nonzero_halves[0];
47434 same_halves = true;
47436 else if (d->one_operand_p)
47438 gcc_assert (nonzero_halves[0] == 0);
47439 gcc_assert (nonzero_halves[1] == 1);
47442 if (nzcnt <= 2)
47444 if (d->perm[0] / nelt2 == nonzero_halves[1])
47446 /* Attempt to increase the likelihood that dfinal
47447 shuffle will be intra-lane. */
47448 std::swap (nonzero_halves[0], nonzero_halves[1]);
47451 /* vperm2f128 or vperm2i128. */
47452 for (i = 0; i < nelt2; ++i)
47454 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47455 remap[i + nonzero_halves[0] * nelt2] = i;
47456 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47457 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47460 if (d->vmode != V8SFmode
47461 && d->vmode != V4DFmode
47462 && d->vmode != V8SImode)
47464 dremap.vmode = V8SImode;
47465 dremap.nelt = 8;
47466 for (i = 0; i < 4; ++i)
47468 dremap.perm[i] = i + nonzero_halves[0] * 4;
47469 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47473 else if (d->one_operand_p)
47474 return false;
47475 else if (TARGET_AVX2
47476 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47478 /* vpunpckl* */
47479 for (i = 0; i < nelt4; ++i)
47481 remap[i] = i * 2;
47482 remap[i + nelt] = i * 2 + 1;
47483 remap[i + nelt2] = i * 2 + nelt2;
47484 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47485 dremap.perm[i * 2] = i;
47486 dremap.perm[i * 2 + 1] = i + nelt;
47487 dremap.perm[i * 2 + nelt2] = i + nelt2;
47488 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47491 else if (TARGET_AVX2
47492 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47494 /* vpunpckh* */
47495 for (i = 0; i < nelt4; ++i)
47497 remap[i + nelt4] = i * 2;
47498 remap[i + nelt + nelt4] = i * 2 + 1;
47499 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47500 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47501 dremap.perm[i * 2] = i + nelt4;
47502 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47503 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47504 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47507 else
47508 return false;
47511 /* Use the remapping array set up above to move the elements from their
47512 swizzled locations into their final destinations. */
47513 dfinal = *d;
47514 for (i = 0; i < nelt; ++i)
47516 unsigned e = remap[d->perm[i]];
47517 gcc_assert (e < nelt);
47518 /* If same_halves is true, both halves of the remapped vector are the
47519 same. Avoid cross-lane accesses if possible. */
47520 if (same_halves && i >= nelt2)
47522 gcc_assert (e < nelt2);
47523 dfinal.perm[i] = e + nelt2;
47525 else
47526 dfinal.perm[i] = e;
47528 if (!d->testing_p)
47530 dremap.target = gen_reg_rtx (dremap.vmode);
47531 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47533 dfinal.op1 = dfinal.op0;
47534 dfinal.one_operand_p = true;
47536 /* Test if the final remap can be done with a single insn. For V4SFmode or
47537 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47538 start_sequence ();
47539 ok = expand_vec_perm_1 (&dfinal);
47540 seq = get_insns ();
47541 end_sequence ();
47543 if (!ok)
47544 return false;
47546 if (d->testing_p)
47547 return true;
47549 if (dremap.vmode != dfinal.vmode)
47551 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47552 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47555 ok = expand_vec_perm_1 (&dremap);
47556 gcc_assert (ok);
47558 emit_insn (seq);
47559 return true;
47562 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47563 a single vector cross-lane permutation into vpermq followed
47564 by any of the single insn permutations. */
47566 static bool
47567 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47569 struct expand_vec_perm_d dremap, dfinal;
47570 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47571 unsigned contents[2];
47572 bool ok;
47574 if (!(TARGET_AVX2
47575 && (d->vmode == V32QImode || d->vmode == V16HImode)
47576 && d->one_operand_p))
47577 return false;
47579 contents[0] = 0;
47580 contents[1] = 0;
47581 for (i = 0; i < nelt2; ++i)
47583 contents[0] |= 1u << (d->perm[i] / nelt4);
47584 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47587 for (i = 0; i < 2; ++i)
47589 unsigned int cnt = 0;
47590 for (j = 0; j < 4; ++j)
47591 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47592 return false;
47595 if (d->testing_p)
47596 return true;
47598 dremap = *d;
47599 dremap.vmode = V4DImode;
47600 dremap.nelt = 4;
47601 dremap.target = gen_reg_rtx (V4DImode);
47602 dremap.op0 = gen_lowpart (V4DImode, d->op0);
47603 dremap.op1 = dremap.op0;
47604 dremap.one_operand_p = true;
47605 for (i = 0; i < 2; ++i)
47607 unsigned int cnt = 0;
47608 for (j = 0; j < 4; ++j)
47609 if ((contents[i] & (1u << j)) != 0)
47610 dremap.perm[2 * i + cnt++] = j;
47611 for (; cnt < 2; ++cnt)
47612 dremap.perm[2 * i + cnt] = 0;
47615 dfinal = *d;
47616 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47617 dfinal.op1 = dfinal.op0;
47618 dfinal.one_operand_p = true;
47619 for (i = 0, j = 0; i < nelt; ++i)
47621 if (i == nelt2)
47622 j = 2;
47623 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
47624 if ((d->perm[i] / nelt4) == dremap.perm[j])
47626 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
47627 dfinal.perm[i] |= nelt4;
47628 else
47629 gcc_unreachable ();
47632 ok = expand_vec_perm_1 (&dremap);
47633 gcc_assert (ok);
47635 ok = expand_vec_perm_1 (&dfinal);
47636 gcc_assert (ok);
47638 return true;
47641 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
47642 a vector permutation using two instructions, vperm2f128 resp.
47643 vperm2i128 followed by any single in-lane permutation. */
47645 static bool
47646 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
47648 struct expand_vec_perm_d dfirst, dsecond;
47649 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
47650 bool ok;
47652 if (!TARGET_AVX
47653 || GET_MODE_SIZE (d->vmode) != 32
47654 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
47655 return false;
47657 dsecond = *d;
47658 dsecond.one_operand_p = false;
47659 dsecond.testing_p = true;
47661 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
47662 immediate. For perm < 16 the second permutation uses
47663 d->op0 as first operand, for perm >= 16 it uses d->op1
47664 as first operand. The second operand is the result of
47665 vperm2[fi]128. */
47666 for (perm = 0; perm < 32; perm++)
47668 /* Ignore permutations which do not move anything cross-lane. */
47669 if (perm < 16)
47671 /* The second shuffle for e.g. V4DFmode has
47672 0123 and ABCD operands.
47673 Ignore AB23, as 23 is already in the second lane
47674 of the first operand. */
47675 if ((perm & 0xc) == (1 << 2)) continue;
47676 /* And 01CD, as 01 is in the first lane of the first
47677 operand. */
47678 if ((perm & 3) == 0) continue;
47679 /* And 4567, as then the vperm2[fi]128 doesn't change
47680 anything on the original 4567 second operand. */
47681 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
47683 else
47685 /* The second shuffle for e.g. V4DFmode has
47686 4567 and ABCD operands.
47687 Ignore AB67, as 67 is already in the second lane
47688 of the first operand. */
47689 if ((perm & 0xc) == (3 << 2)) continue;
47690 /* And 45CD, as 45 is in the first lane of the first
47691 operand. */
47692 if ((perm & 3) == 2) continue;
47693 /* And 0123, as then the vperm2[fi]128 doesn't change
47694 anything on the original 0123 first operand. */
47695 if ((perm & 0xf) == (1 << 2)) continue;
47698 for (i = 0; i < nelt; i++)
47700 j = d->perm[i] / nelt2;
47701 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
47702 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
47703 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
47704 dsecond.perm[i] = d->perm[i] & (nelt - 1);
47705 else
47706 break;
47709 if (i == nelt)
47711 start_sequence ();
47712 ok = expand_vec_perm_1 (&dsecond);
47713 end_sequence ();
47715 else
47716 ok = false;
47718 if (ok)
47720 if (d->testing_p)
47721 return true;
47723 /* Found a usable second shuffle. dfirst will be
47724 vperm2f128 on d->op0 and d->op1. */
47725 dsecond.testing_p = false;
47726 dfirst = *d;
47727 dfirst.target = gen_reg_rtx (d->vmode);
47728 for (i = 0; i < nelt; i++)
47729 dfirst.perm[i] = (i & (nelt2 - 1))
47730 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
47732 canonicalize_perm (&dfirst);
47733 ok = expand_vec_perm_1 (&dfirst);
47734 gcc_assert (ok);
47736 /* And dsecond is some single insn shuffle, taking
47737 d->op0 and result of vperm2f128 (if perm < 16) or
47738 d->op1 and result of vperm2f128 (otherwise). */
47739 if (perm >= 16)
47740 dsecond.op0 = dsecond.op1;
47741 dsecond.op1 = dfirst.target;
47743 ok = expand_vec_perm_1 (&dsecond);
47744 gcc_assert (ok);
47746 return true;
47749 /* For one operand, the only useful vperm2f128 permutation is 0x01
47750 aka lanes swap. */
47751 if (d->one_operand_p)
47752 return false;
47755 return false;
47758 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47759 a two vector permutation using 2 intra-lane interleave insns
47760 and cross-lane shuffle for 32-byte vectors. */
47762 static bool
47763 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
47765 unsigned i, nelt;
47766 rtx (*gen) (rtx, rtx, rtx);
47768 if (d->one_operand_p)
47769 return false;
47770 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
47772 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
47774 else
47775 return false;
47777 nelt = d->nelt;
47778 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
47779 return false;
47780 for (i = 0; i < nelt; i += 2)
47781 if (d->perm[i] != d->perm[0] + i / 2
47782 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
47783 return false;
47785 if (d->testing_p)
47786 return true;
47788 switch (d->vmode)
47790 case E_V32QImode:
47791 if (d->perm[0])
47792 gen = gen_vec_interleave_highv32qi;
47793 else
47794 gen = gen_vec_interleave_lowv32qi;
47795 break;
47796 case E_V16HImode:
47797 if (d->perm[0])
47798 gen = gen_vec_interleave_highv16hi;
47799 else
47800 gen = gen_vec_interleave_lowv16hi;
47801 break;
47802 case E_V8SImode:
47803 if (d->perm[0])
47804 gen = gen_vec_interleave_highv8si;
47805 else
47806 gen = gen_vec_interleave_lowv8si;
47807 break;
47808 case E_V4DImode:
47809 if (d->perm[0])
47810 gen = gen_vec_interleave_highv4di;
47811 else
47812 gen = gen_vec_interleave_lowv4di;
47813 break;
47814 case E_V8SFmode:
47815 if (d->perm[0])
47816 gen = gen_vec_interleave_highv8sf;
47817 else
47818 gen = gen_vec_interleave_lowv8sf;
47819 break;
47820 case E_V4DFmode:
47821 if (d->perm[0])
47822 gen = gen_vec_interleave_highv4df;
47823 else
47824 gen = gen_vec_interleave_lowv4df;
47825 break;
47826 default:
47827 gcc_unreachable ();
47830 emit_insn (gen (d->target, d->op0, d->op1));
47831 return true;
47834 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
47835 a single vector permutation using a single intra-lane vector
47836 permutation, vperm2f128 swapping the lanes and vblend* insn blending
47837 the non-swapped and swapped vectors together. */
47839 static bool
47840 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
47842 struct expand_vec_perm_d dfirst, dsecond;
47843 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
47844 rtx_insn *seq;
47845 bool ok;
47846 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
47848 if (!TARGET_AVX
47849 || TARGET_AVX2
47850 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
47851 || !d->one_operand_p)
47852 return false;
47854 dfirst = *d;
47855 for (i = 0; i < nelt; i++)
47856 dfirst.perm[i] = 0xff;
47857 for (i = 0, msk = 0; i < nelt; i++)
47859 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
47860 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
47861 return false;
47862 dfirst.perm[j] = d->perm[i];
47863 if (j != i)
47864 msk |= (1 << i);
47866 for (i = 0; i < nelt; i++)
47867 if (dfirst.perm[i] == 0xff)
47868 dfirst.perm[i] = i;
47870 if (!d->testing_p)
47871 dfirst.target = gen_reg_rtx (dfirst.vmode);
47873 start_sequence ();
47874 ok = expand_vec_perm_1 (&dfirst);
47875 seq = get_insns ();
47876 end_sequence ();
47878 if (!ok)
47879 return false;
47881 if (d->testing_p)
47882 return true;
47884 emit_insn (seq);
47886 dsecond = *d;
47887 dsecond.op0 = dfirst.target;
47888 dsecond.op1 = dfirst.target;
47889 dsecond.one_operand_p = true;
47890 dsecond.target = gen_reg_rtx (dsecond.vmode);
47891 for (i = 0; i < nelt; i++)
47892 dsecond.perm[i] = i ^ nelt2;
47894 ok = expand_vec_perm_1 (&dsecond);
47895 gcc_assert (ok);
47897 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
47898 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
47899 return true;
47902 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
47903 permutation using two vperm2f128, followed by a vshufpd insn blending
47904 the two vectors together. */
47906 static bool
47907 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
47909 struct expand_vec_perm_d dfirst, dsecond, dthird;
47910 bool ok;
47912 if (!TARGET_AVX || (d->vmode != V4DFmode))
47913 return false;
47915 if (d->testing_p)
47916 return true;
47918 dfirst = *d;
47919 dsecond = *d;
47920 dthird = *d;
47922 dfirst.perm[0] = (d->perm[0] & ~1);
47923 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
47924 dfirst.perm[2] = (d->perm[2] & ~1);
47925 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
47926 dsecond.perm[0] = (d->perm[1] & ~1);
47927 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
47928 dsecond.perm[2] = (d->perm[3] & ~1);
47929 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
47930 dthird.perm[0] = (d->perm[0] % 2);
47931 dthird.perm[1] = (d->perm[1] % 2) + 4;
47932 dthird.perm[2] = (d->perm[2] % 2) + 2;
47933 dthird.perm[3] = (d->perm[3] % 2) + 6;
47935 dfirst.target = gen_reg_rtx (dfirst.vmode);
47936 dsecond.target = gen_reg_rtx (dsecond.vmode);
47937 dthird.op0 = dfirst.target;
47938 dthird.op1 = dsecond.target;
47939 dthird.one_operand_p = false;
47941 canonicalize_perm (&dfirst);
47942 canonicalize_perm (&dsecond);
47944 ok = expand_vec_perm_1 (&dfirst)
47945 && expand_vec_perm_1 (&dsecond)
47946 && expand_vec_perm_1 (&dthird);
47948 gcc_assert (ok);
47950 return true;
47953 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
47954 permutation with two pshufb insns and an ior. We should have already
47955 failed all two instruction sequences. */
47957 static bool
47958 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
47960 rtx rperm[2][16], vperm, l, h, op, m128;
47961 unsigned int i, nelt, eltsz;
47963 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47964 return false;
47965 gcc_assert (!d->one_operand_p);
47967 if (d->testing_p)
47968 return true;
47970 nelt = d->nelt;
47971 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
47973 /* Generate two permutation masks. If the required element is within
47974 the given vector it is shuffled into the proper lane. If the required
47975 element is in the other vector, force a zero into the lane by setting
47976 bit 7 in the permutation mask. */
47977 m128 = GEN_INT (-128);
47978 for (i = 0; i < nelt; ++i)
47980 unsigned j, e = d->perm[i];
47981 unsigned which = (e >= nelt);
47982 if (e >= nelt)
47983 e -= nelt;
47985 for (j = 0; j < eltsz; ++j)
47987 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
47988 rperm[1-which][i*eltsz + j] = m128;
47992 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
47993 vperm = force_reg (V16QImode, vperm);
47995 l = gen_reg_rtx (V16QImode);
47996 op = gen_lowpart (V16QImode, d->op0);
47997 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
47999 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48000 vperm = force_reg (V16QImode, vperm);
48002 h = gen_reg_rtx (V16QImode);
48003 op = gen_lowpart (V16QImode, d->op1);
48004 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48006 op = d->target;
48007 if (d->vmode != V16QImode)
48008 op = gen_reg_rtx (V16QImode);
48009 emit_insn (gen_iorv16qi3 (op, l, h));
48010 if (op != d->target)
48011 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48013 return true;
48016 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48017 with two vpshufb insns, vpermq and vpor. We should have already failed
48018 all two or three instruction sequences. */
48020 static bool
48021 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48023 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48024 unsigned int i, nelt, eltsz;
48026 if (!TARGET_AVX2
48027 || !d->one_operand_p
48028 || (d->vmode != V32QImode && d->vmode != V16HImode))
48029 return false;
48031 if (d->testing_p)
48032 return true;
48034 nelt = d->nelt;
48035 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48037 /* Generate two permutation masks. If the required element is within
48038 the same lane, it is shuffled in. If the required element from the
48039 other lane, force a zero by setting bit 7 in the permutation mask.
48040 In the other mask the mask has non-negative elements if element
48041 is requested from the other lane, but also moved to the other lane,
48042 so that the result of vpshufb can have the two V2TImode halves
48043 swapped. */
48044 m128 = GEN_INT (-128);
48045 for (i = 0; i < nelt; ++i)
48047 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48048 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48050 for (j = 0; j < eltsz; ++j)
48052 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48053 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48057 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48058 vperm = force_reg (V32QImode, vperm);
48060 h = gen_reg_rtx (V32QImode);
48061 op = gen_lowpart (V32QImode, d->op0);
48062 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48064 /* Swap the 128-byte lanes of h into hp. */
48065 hp = gen_reg_rtx (V4DImode);
48066 op = gen_lowpart (V4DImode, h);
48067 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48068 const1_rtx));
48070 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48071 vperm = force_reg (V32QImode, vperm);
48073 l = gen_reg_rtx (V32QImode);
48074 op = gen_lowpart (V32QImode, d->op0);
48075 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48077 op = d->target;
48078 if (d->vmode != V32QImode)
48079 op = gen_reg_rtx (V32QImode);
48080 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48081 if (op != d->target)
48082 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48084 return true;
48087 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48088 and extract-odd permutations of two V32QImode and V16QImode operand
48089 with two vpshufb insns, vpor and vpermq. We should have already
48090 failed all two or three instruction sequences. */
48092 static bool
48093 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48095 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48096 unsigned int i, nelt, eltsz;
48098 if (!TARGET_AVX2
48099 || d->one_operand_p
48100 || (d->vmode != V32QImode && d->vmode != V16HImode))
48101 return false;
48103 for (i = 0; i < d->nelt; ++i)
48104 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48105 return false;
48107 if (d->testing_p)
48108 return true;
48110 nelt = d->nelt;
48111 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48113 /* Generate two permutation masks. In the first permutation mask
48114 the first quarter will contain indexes for the first half
48115 of the op0, the second quarter will contain bit 7 set, third quarter
48116 will contain indexes for the second half of the op0 and the
48117 last quarter bit 7 set. In the second permutation mask
48118 the first quarter will contain bit 7 set, the second quarter
48119 indexes for the first half of the op1, the third quarter bit 7 set
48120 and last quarter indexes for the second half of the op1.
48121 I.e. the first mask e.g. for V32QImode extract even will be:
48122 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48123 (all values masked with 0xf except for -128) and second mask
48124 for extract even will be
48125 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48126 m128 = GEN_INT (-128);
48127 for (i = 0; i < nelt; ++i)
48129 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48130 unsigned which = d->perm[i] >= nelt;
48131 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48133 for (j = 0; j < eltsz; ++j)
48135 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48136 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48140 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48141 vperm = force_reg (V32QImode, vperm);
48143 l = gen_reg_rtx (V32QImode);
48144 op = gen_lowpart (V32QImode, d->op0);
48145 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48147 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48148 vperm = force_reg (V32QImode, vperm);
48150 h = gen_reg_rtx (V32QImode);
48151 op = gen_lowpart (V32QImode, d->op1);
48152 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48154 ior = gen_reg_rtx (V32QImode);
48155 emit_insn (gen_iorv32qi3 (ior, l, h));
48157 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48158 op = gen_reg_rtx (V4DImode);
48159 ior = gen_lowpart (V4DImode, ior);
48160 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48161 const1_rtx, GEN_INT (3)));
48162 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48164 return true;
48167 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48168 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48169 with two "and" and "pack" or two "shift" and "pack" insns. We should
48170 have already failed all two instruction sequences. */
48172 static bool
48173 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48175 rtx op, dop0, dop1, t;
48176 unsigned i, odd, c, s, nelt = d->nelt;
48177 bool end_perm = false;
48178 machine_mode half_mode;
48179 rtx (*gen_and) (rtx, rtx, rtx);
48180 rtx (*gen_pack) (rtx, rtx, rtx);
48181 rtx (*gen_shift) (rtx, rtx, rtx);
48183 if (d->one_operand_p)
48184 return false;
48186 switch (d->vmode)
48188 case E_V8HImode:
48189 /* Required for "pack". */
48190 if (!TARGET_SSE4_1)
48191 return false;
48192 c = 0xffff;
48193 s = 16;
48194 half_mode = V4SImode;
48195 gen_and = gen_andv4si3;
48196 gen_pack = gen_sse4_1_packusdw;
48197 gen_shift = gen_lshrv4si3;
48198 break;
48199 case E_V16QImode:
48200 /* No check as all instructions are SSE2. */
48201 c = 0xff;
48202 s = 8;
48203 half_mode = V8HImode;
48204 gen_and = gen_andv8hi3;
48205 gen_pack = gen_sse2_packuswb;
48206 gen_shift = gen_lshrv8hi3;
48207 break;
48208 case E_V16HImode:
48209 if (!TARGET_AVX2)
48210 return false;
48211 c = 0xffff;
48212 s = 16;
48213 half_mode = V8SImode;
48214 gen_and = gen_andv8si3;
48215 gen_pack = gen_avx2_packusdw;
48216 gen_shift = gen_lshrv8si3;
48217 end_perm = true;
48218 break;
48219 case E_V32QImode:
48220 if (!TARGET_AVX2)
48221 return false;
48222 c = 0xff;
48223 s = 8;
48224 half_mode = V16HImode;
48225 gen_and = gen_andv16hi3;
48226 gen_pack = gen_avx2_packuswb;
48227 gen_shift = gen_lshrv16hi3;
48228 end_perm = true;
48229 break;
48230 default:
48231 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48232 general shuffles. */
48233 return false;
48236 /* Check that permutation is even or odd. */
48237 odd = d->perm[0];
48238 if (odd > 1)
48239 return false;
48241 for (i = 1; i < nelt; ++i)
48242 if (d->perm[i] != 2 * i + odd)
48243 return false;
48245 if (d->testing_p)
48246 return true;
48248 dop0 = gen_reg_rtx (half_mode);
48249 dop1 = gen_reg_rtx (half_mode);
48250 if (odd == 0)
48252 t = gen_const_vec_duplicate (half_mode, GEN_INT (c));
48253 t = force_reg (half_mode, t);
48254 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48255 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48257 else
48259 emit_insn (gen_shift (dop0,
48260 gen_lowpart (half_mode, d->op0),
48261 GEN_INT (s)));
48262 emit_insn (gen_shift (dop1,
48263 gen_lowpart (half_mode, d->op1),
48264 GEN_INT (s)));
48266 /* In AVX2 for 256 bit case we need to permute pack result. */
48267 if (TARGET_AVX2 && end_perm)
48269 op = gen_reg_rtx (d->vmode);
48270 t = gen_reg_rtx (V4DImode);
48271 emit_insn (gen_pack (op, dop0, dop1));
48272 emit_insn (gen_avx2_permv4di_1 (t,
48273 gen_lowpart (V4DImode, op),
48274 const0_rtx,
48275 const2_rtx,
48276 const1_rtx,
48277 GEN_INT (3)));
48278 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48280 else
48281 emit_insn (gen_pack (d->target, dop0, dop1));
48283 return true;
48286 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48287 and extract-odd permutations of two V64QI operands
48288 with two "shifts", two "truncs" and one "concat" insns for "odd"
48289 and two "truncs" and one concat insn for "even."
48290 Have already failed all two instruction sequences. */
48292 static bool
48293 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
48295 rtx t1, t2, t3, t4;
48296 unsigned i, odd, nelt = d->nelt;
48298 if (!TARGET_AVX512BW
48299 || d->one_operand_p
48300 || d->vmode != V64QImode)
48301 return false;
48303 /* Check that permutation is even or odd. */
48304 odd = d->perm[0];
48305 if (odd > 1)
48306 return false;
48308 for (i = 1; i < nelt; ++i)
48309 if (d->perm[i] != 2 * i + odd)
48310 return false;
48312 if (d->testing_p)
48313 return true;
48316 if (odd)
48318 t1 = gen_reg_rtx (V32HImode);
48319 t2 = gen_reg_rtx (V32HImode);
48320 emit_insn (gen_lshrv32hi3 (t1,
48321 gen_lowpart (V32HImode, d->op0),
48322 GEN_INT (8)));
48323 emit_insn (gen_lshrv32hi3 (t2,
48324 gen_lowpart (V32HImode, d->op1),
48325 GEN_INT (8)));
48327 else
48329 t1 = gen_lowpart (V32HImode, d->op0);
48330 t2 = gen_lowpart (V32HImode, d->op1);
48333 t3 = gen_reg_rtx (V32QImode);
48334 t4 = gen_reg_rtx (V32QImode);
48335 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
48336 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
48337 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
48339 return true;
48342 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48343 and extract-odd permutations. */
48345 static bool
48346 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48348 rtx t1, t2, t3, t4, t5;
48350 switch (d->vmode)
48352 case E_V4DFmode:
48353 if (d->testing_p)
48354 break;
48355 t1 = gen_reg_rtx (V4DFmode);
48356 t2 = gen_reg_rtx (V4DFmode);
48358 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48359 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48360 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48362 /* Now an unpck[lh]pd will produce the result required. */
48363 if (odd)
48364 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48365 else
48366 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48367 emit_insn (t3);
48368 break;
48370 case E_V8SFmode:
48372 int mask = odd ? 0xdd : 0x88;
48374 if (d->testing_p)
48375 break;
48376 t1 = gen_reg_rtx (V8SFmode);
48377 t2 = gen_reg_rtx (V8SFmode);
48378 t3 = gen_reg_rtx (V8SFmode);
48380 /* Shuffle within the 128-bit lanes to produce:
48381 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48382 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48383 GEN_INT (mask)));
48385 /* Shuffle the lanes around to produce:
48386 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48387 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48388 GEN_INT (0x3)));
48390 /* Shuffle within the 128-bit lanes to produce:
48391 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48392 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48394 /* Shuffle within the 128-bit lanes to produce:
48395 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48396 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48398 /* Shuffle the lanes around to produce:
48399 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48400 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48401 GEN_INT (0x20)));
48403 break;
48405 case E_V2DFmode:
48406 case E_V4SFmode:
48407 case E_V2DImode:
48408 case E_V4SImode:
48409 /* These are always directly implementable by expand_vec_perm_1. */
48410 gcc_unreachable ();
48412 case E_V8HImode:
48413 if (TARGET_SSE4_1)
48414 return expand_vec_perm_even_odd_pack (d);
48415 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48416 return expand_vec_perm_pshufb2 (d);
48417 else
48419 if (d->testing_p)
48420 break;
48421 /* We need 2*log2(N)-1 operations to achieve odd/even
48422 with interleave. */
48423 t1 = gen_reg_rtx (V8HImode);
48424 t2 = gen_reg_rtx (V8HImode);
48425 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48426 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48427 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48428 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48429 if (odd)
48430 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48431 else
48432 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48433 emit_insn (t3);
48435 break;
48437 case E_V16QImode:
48438 return expand_vec_perm_even_odd_pack (d);
48440 case E_V16HImode:
48441 case E_V32QImode:
48442 return expand_vec_perm_even_odd_pack (d);
48444 case E_V64QImode:
48445 return expand_vec_perm_even_odd_trunc (d);
48447 case E_V4DImode:
48448 if (!TARGET_AVX2)
48450 struct expand_vec_perm_d d_copy = *d;
48451 d_copy.vmode = V4DFmode;
48452 if (d->testing_p)
48453 d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
48454 else
48455 d_copy.target = gen_reg_rtx (V4DFmode);
48456 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48457 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48458 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48460 if (!d->testing_p)
48461 emit_move_insn (d->target,
48462 gen_lowpart (V4DImode, d_copy.target));
48463 return true;
48465 return false;
48468 if (d->testing_p)
48469 break;
48471 t1 = gen_reg_rtx (V4DImode);
48472 t2 = gen_reg_rtx (V4DImode);
48474 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48475 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48476 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48478 /* Now an vpunpck[lh]qdq will produce the result required. */
48479 if (odd)
48480 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48481 else
48482 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48483 emit_insn (t3);
48484 break;
48486 case E_V8SImode:
48487 if (!TARGET_AVX2)
48489 struct expand_vec_perm_d d_copy = *d;
48490 d_copy.vmode = V8SFmode;
48491 if (d->testing_p)
48492 d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
48493 else
48494 d_copy.target = gen_reg_rtx (V8SFmode);
48495 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48496 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48497 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48499 if (!d->testing_p)
48500 emit_move_insn (d->target,
48501 gen_lowpart (V8SImode, d_copy.target));
48502 return true;
48504 return false;
48507 if (d->testing_p)
48508 break;
48510 t1 = gen_reg_rtx (V8SImode);
48511 t2 = gen_reg_rtx (V8SImode);
48512 t3 = gen_reg_rtx (V4DImode);
48513 t4 = gen_reg_rtx (V4DImode);
48514 t5 = gen_reg_rtx (V4DImode);
48516 /* Shuffle the lanes around into
48517 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48518 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48519 gen_lowpart (V4DImode, d->op1),
48520 GEN_INT (0x20)));
48521 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48522 gen_lowpart (V4DImode, d->op1),
48523 GEN_INT (0x31)));
48525 /* Swap the 2nd and 3rd position in each lane into
48526 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48527 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48528 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48529 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48530 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48532 /* Now an vpunpck[lh]qdq will produce
48533 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48534 if (odd)
48535 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48536 gen_lowpart (V4DImode, t2));
48537 else
48538 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48539 gen_lowpart (V4DImode, t2));
48540 emit_insn (t3);
48541 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48542 break;
48544 default:
48545 gcc_unreachable ();
48548 return true;
48551 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48552 extract-even and extract-odd permutations. */
48554 static bool
48555 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48557 unsigned i, odd, nelt = d->nelt;
48559 odd = d->perm[0];
48560 if (odd != 0 && odd != 1)
48561 return false;
48563 for (i = 1; i < nelt; ++i)
48564 if (d->perm[i] != 2 * i + odd)
48565 return false;
48567 return expand_vec_perm_even_odd_1 (d, odd);
48570 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48571 permutations. We assume that expand_vec_perm_1 has already failed. */
48573 static bool
48574 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48576 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48577 machine_mode vmode = d->vmode;
48578 unsigned char perm2[4];
48579 rtx op0 = d->op0, dest;
48580 bool ok;
48582 switch (vmode)
48584 case E_V4DFmode:
48585 case E_V8SFmode:
48586 /* These are special-cased in sse.md so that we can optionally
48587 use the vbroadcast instruction. They expand to two insns
48588 if the input happens to be in a register. */
48589 gcc_unreachable ();
48591 case E_V2DFmode:
48592 case E_V2DImode:
48593 case E_V4SFmode:
48594 case E_V4SImode:
48595 /* These are always implementable using standard shuffle patterns. */
48596 gcc_unreachable ();
48598 case E_V8HImode:
48599 case E_V16QImode:
48600 /* These can be implemented via interleave. We save one insn by
48601 stopping once we have promoted to V4SImode and then use pshufd. */
48602 if (d->testing_p)
48603 return true;
48606 rtx dest;
48607 rtx (*gen) (rtx, rtx, rtx)
48608 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48609 : gen_vec_interleave_lowv8hi;
48611 if (elt >= nelt2)
48613 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48614 : gen_vec_interleave_highv8hi;
48615 elt -= nelt2;
48617 nelt2 /= 2;
48619 dest = gen_reg_rtx (vmode);
48620 emit_insn (gen (dest, op0, op0));
48621 vmode = get_mode_wider_vector (vmode);
48622 op0 = gen_lowpart (vmode, dest);
48624 while (vmode != V4SImode);
48626 memset (perm2, elt, 4);
48627 dest = gen_reg_rtx (V4SImode);
48628 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48629 gcc_assert (ok);
48630 if (!d->testing_p)
48631 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48632 return true;
48634 case E_V64QImode:
48635 case E_V32QImode:
48636 case E_V16HImode:
48637 case E_V8SImode:
48638 case E_V4DImode:
48639 /* For AVX2 broadcasts of the first element vpbroadcast* or
48640 vpermq should be used by expand_vec_perm_1. */
48641 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48642 return false;
48644 default:
48645 gcc_unreachable ();
48649 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48650 broadcast permutations. */
48652 static bool
48653 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48655 unsigned i, elt, nelt = d->nelt;
48657 if (!d->one_operand_p)
48658 return false;
48660 elt = d->perm[0];
48661 for (i = 1; i < nelt; ++i)
48662 if (d->perm[i] != elt)
48663 return false;
48665 return expand_vec_perm_broadcast_1 (d);
48668 /* Implement arbitrary permutations of two V64QImode operands
48669 with 2 vperm[it]2w, 2 vpshufb and one vpor instruction. */
48670 static bool
48671 expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d)
48673 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
48674 return false;
48676 if (d->testing_p)
48677 return true;
48679 struct expand_vec_perm_d ds[2];
48680 rtx rperm[128], vperm, target0, target1;
48681 unsigned int i, nelt;
48682 machine_mode vmode;
48684 nelt = d->nelt;
48685 vmode = V64QImode;
48687 for (i = 0; i < 2; i++)
48689 ds[i] = *d;
48690 ds[i].vmode = V32HImode;
48691 ds[i].nelt = 32;
48692 ds[i].target = gen_reg_rtx (V32HImode);
48693 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
48694 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
48697 /* Prepare permutations such that the first one takes care of
48698 putting the even bytes into the right positions or one higher
48699 positions (ds[0]) and the second one takes care of
48700 putting the odd bytes into the right positions or one below
48701 (ds[1]). */
48703 for (i = 0; i < nelt; i++)
48705 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
48706 if (i & 1)
48708 rperm[i] = constm1_rtx;
48709 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
48711 else
48713 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
48714 rperm[i + 64] = constm1_rtx;
48718 bool ok = expand_vec_perm_1 (&ds[0]);
48719 gcc_assert (ok);
48720 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
48722 ok = expand_vec_perm_1 (&ds[1]);
48723 gcc_assert (ok);
48724 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
48726 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
48727 vperm = force_reg (vmode, vperm);
48728 target0 = gen_reg_rtx (V64QImode);
48729 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
48731 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
48732 vperm = force_reg (vmode, vperm);
48733 target1 = gen_reg_rtx (V64QImode);
48734 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
48736 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
48737 return true;
48740 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
48741 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
48742 all the shorter instruction sequences. */
48744 static bool
48745 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
48747 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
48748 unsigned int i, nelt, eltsz;
48749 bool used[4];
48751 if (!TARGET_AVX2
48752 || d->one_operand_p
48753 || (d->vmode != V32QImode && d->vmode != V16HImode))
48754 return false;
48756 if (d->testing_p)
48757 return true;
48759 nelt = d->nelt;
48760 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48762 /* Generate 4 permutation masks. If the required element is within
48763 the same lane, it is shuffled in. If the required element from the
48764 other lane, force a zero by setting bit 7 in the permutation mask.
48765 In the other mask the mask has non-negative elements if element
48766 is requested from the other lane, but also moved to the other lane,
48767 so that the result of vpshufb can have the two V2TImode halves
48768 swapped. */
48769 m128 = GEN_INT (-128);
48770 for (i = 0; i < 32; ++i)
48772 rperm[0][i] = m128;
48773 rperm[1][i] = m128;
48774 rperm[2][i] = m128;
48775 rperm[3][i] = m128;
48777 used[0] = false;
48778 used[1] = false;
48779 used[2] = false;
48780 used[3] = false;
48781 for (i = 0; i < nelt; ++i)
48783 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48784 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48785 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
48787 for (j = 0; j < eltsz; ++j)
48788 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
48789 used[which] = true;
48792 for (i = 0; i < 2; ++i)
48794 if (!used[2 * i + 1])
48796 h[i] = NULL_RTX;
48797 continue;
48799 vperm = gen_rtx_CONST_VECTOR (V32QImode,
48800 gen_rtvec_v (32, rperm[2 * i + 1]));
48801 vperm = force_reg (V32QImode, vperm);
48802 h[i] = gen_reg_rtx (V32QImode);
48803 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48804 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
48807 /* Swap the 128-byte lanes of h[X]. */
48808 for (i = 0; i < 2; ++i)
48810 if (h[i] == NULL_RTX)
48811 continue;
48812 op = gen_reg_rtx (V4DImode);
48813 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
48814 const2_rtx, GEN_INT (3), const0_rtx,
48815 const1_rtx));
48816 h[i] = gen_lowpart (V32QImode, op);
48819 for (i = 0; i < 2; ++i)
48821 if (!used[2 * i])
48823 l[i] = NULL_RTX;
48824 continue;
48826 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
48827 vperm = force_reg (V32QImode, vperm);
48828 l[i] = gen_reg_rtx (V32QImode);
48829 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48830 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
48833 for (i = 0; i < 2; ++i)
48835 if (h[i] && l[i])
48837 op = gen_reg_rtx (V32QImode);
48838 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
48839 l[i] = op;
48841 else if (h[i])
48842 l[i] = h[i];
48845 gcc_assert (l[0] && l[1]);
48846 op = d->target;
48847 if (d->vmode != V32QImode)
48848 op = gen_reg_rtx (V32QImode);
48849 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
48850 if (op != d->target)
48851 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48852 return true;
48855 /* The guts of ix86_vectorize_vec_perm_const. With all of the interface bits
48856 taken care of, perform the expansion in D and return true on success. */
48858 static bool
48859 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
48861 /* Try a single instruction expansion. */
48862 if (expand_vec_perm_1 (d))
48863 return true;
48865 /* Try sequences of two instructions. */
48867 if (expand_vec_perm_pshuflw_pshufhw (d))
48868 return true;
48870 if (expand_vec_perm_palignr (d, false))
48871 return true;
48873 if (expand_vec_perm_interleave2 (d))
48874 return true;
48876 if (expand_vec_perm_broadcast (d))
48877 return true;
48879 if (expand_vec_perm_vpermq_perm_1 (d))
48880 return true;
48882 if (expand_vec_perm_vperm2f128 (d))
48883 return true;
48885 if (expand_vec_perm_pblendv (d))
48886 return true;
48888 /* Try sequences of three instructions. */
48890 if (expand_vec_perm_even_odd_pack (d))
48891 return true;
48893 if (expand_vec_perm_2vperm2f128_vshuf (d))
48894 return true;
48896 if (expand_vec_perm_pshufb2 (d))
48897 return true;
48899 if (expand_vec_perm_interleave3 (d))
48900 return true;
48902 if (expand_vec_perm_vperm2f128_vblend (d))
48903 return true;
48905 /* Try sequences of four instructions. */
48907 if (expand_vec_perm_even_odd_trunc (d))
48908 return true;
48909 if (expand_vec_perm_vpshufb2_vpermq (d))
48910 return true;
48912 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
48913 return true;
48915 if (expand_vec_perm_vpermt2_vpshub2 (d))
48916 return true;
48918 /* ??? Look for narrow permutations whose element orderings would
48919 allow the promotion to a wider mode. */
48921 /* ??? Look for sequences of interleave or a wider permute that place
48922 the data into the correct lanes for a half-vector shuffle like
48923 pshuf[lh]w or vpermilps. */
48925 /* ??? Look for sequences of interleave that produce the desired results.
48926 The combinatorics of punpck[lh] get pretty ugly... */
48928 if (expand_vec_perm_even_odd (d))
48929 return true;
48931 /* Even longer sequences. */
48932 if (expand_vec_perm_vpshufb4_vpermq2 (d))
48933 return true;
48935 /* See if we can get the same permutation in different vector integer
48936 mode. */
48937 struct expand_vec_perm_d nd;
48938 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
48940 if (!d->testing_p)
48941 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
48942 return true;
48945 return false;
48948 /* If a permutation only uses one operand, make it clear. Returns true
48949 if the permutation references both operands. */
48951 static bool
48952 canonicalize_perm (struct expand_vec_perm_d *d)
48954 int i, which, nelt = d->nelt;
48956 for (i = which = 0; i < nelt; ++i)
48957 which |= (d->perm[i] < nelt ? 1 : 2);
48959 d->one_operand_p = true;
48960 switch (which)
48962 default:
48963 gcc_unreachable();
48965 case 3:
48966 if (!rtx_equal_p (d->op0, d->op1))
48968 d->one_operand_p = false;
48969 break;
48971 /* The elements of PERM do not suggest that only the first operand
48972 is used, but both operands are identical. Allow easier matching
48973 of the permutation by folding the permutation into the single
48974 input vector. */
48975 /* FALLTHRU */
48977 case 2:
48978 for (i = 0; i < nelt; ++i)
48979 d->perm[i] &= nelt - 1;
48980 d->op0 = d->op1;
48981 break;
48983 case 1:
48984 d->op1 = d->op0;
48985 break;
48988 return (which == 3);
48991 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
48993 static bool
48994 ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
48995 rtx op1, const vec_perm_indices &sel)
48997 struct expand_vec_perm_d d;
48998 unsigned char perm[MAX_VECT_LEN];
48999 unsigned int i, nelt, which;
49000 bool two_args;
49002 d.target = target;
49003 d.op0 = op0;
49004 d.op1 = op1;
49006 d.vmode = vmode;
49007 gcc_assert (VECTOR_MODE_P (d.vmode));
49008 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49009 d.testing_p = !target;
49011 gcc_assert (sel.length () == nelt);
49012 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49014 /* Given sufficient ISA support we can just return true here
49015 for selected vector modes. */
49016 switch (d.vmode)
49018 case E_V16SFmode:
49019 case E_V16SImode:
49020 case E_V8DImode:
49021 case E_V8DFmode:
49022 if (!TARGET_AVX512F)
49023 return false;
49024 /* All implementable with a single vperm[it]2 insn. */
49025 if (d.testing_p)
49026 return true;
49027 break;
49028 case E_V32HImode:
49029 if (!TARGET_AVX512BW)
49030 return false;
49031 if (d.testing_p)
49032 /* All implementable with a single vperm[it]2 insn. */
49033 return true;
49034 break;
49035 case E_V64QImode:
49036 if (!TARGET_AVX512BW)
49037 return false;
49038 if (d.testing_p)
49039 /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */
49040 return true;
49041 break;
49042 case E_V8SImode:
49043 case E_V8SFmode:
49044 case E_V4DFmode:
49045 case E_V4DImode:
49046 if (!TARGET_AVX)
49047 return false;
49048 if (d.testing_p && TARGET_AVX512VL)
49049 /* All implementable with a single vperm[it]2 insn. */
49050 return true;
49051 break;
49052 case E_V16HImode:
49053 if (!TARGET_SSE2)
49054 return false;
49055 if (d.testing_p && TARGET_AVX2)
49056 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49057 return true;
49058 break;
49059 case E_V32QImode:
49060 if (!TARGET_SSE2)
49061 return false;
49062 if (d.testing_p && TARGET_AVX2)
49063 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49064 return true;
49065 break;
49066 case E_V8HImode:
49067 case E_V16QImode:
49068 if (!TARGET_SSE2)
49069 return false;
49070 /* Fall through. */
49071 case E_V4SImode:
49072 case E_V4SFmode:
49073 if (!TARGET_SSE)
49074 return false;
49075 /* All implementable with a single vpperm insn. */
49076 if (d.testing_p && TARGET_XOP)
49077 return true;
49078 /* All implementable with 2 pshufb + 1 ior. */
49079 if (d.testing_p && TARGET_SSSE3)
49080 return true;
49081 break;
49082 case E_V2DImode:
49083 case E_V2DFmode:
49084 if (!TARGET_SSE)
49085 return false;
49086 /* All implementable with shufpd or unpck[lh]pd. */
49087 if (d.testing_p)
49088 return true;
49089 break;
49090 default:
49091 return false;
49094 for (i = which = 0; i < nelt; ++i)
49096 unsigned char e = sel[i];
49097 gcc_assert (e < 2 * nelt);
49098 d.perm[i] = e;
49099 perm[i] = e;
49100 which |= (e < nelt ? 1 : 2);
49103 if (d.testing_p)
49105 /* For all elements from second vector, fold the elements to first. */
49106 if (which == 2)
49107 for (i = 0; i < nelt; ++i)
49108 d.perm[i] -= nelt;
49110 /* Check whether the mask can be applied to the vector type. */
49111 d.one_operand_p = (which != 3);
49113 /* Implementable with shufps or pshufd. */
49114 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49115 return true;
49117 /* Otherwise we have to go through the motions and see if we can
49118 figure out how to generate the requested permutation. */
49119 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49120 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49121 if (!d.one_operand_p)
49122 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49124 start_sequence ();
49125 bool ret = ix86_expand_vec_perm_const_1 (&d);
49126 end_sequence ();
49128 return ret;
49131 two_args = canonicalize_perm (&d);
49133 if (ix86_expand_vec_perm_const_1 (&d))
49134 return true;
49136 /* If the selector says both arguments are needed, but the operands are the
49137 same, the above tried to expand with one_operand_p and flattened selector.
49138 If that didn't work, retry without one_operand_p; we succeeded with that
49139 during testing. */
49140 if (two_args && d.one_operand_p)
49142 d.one_operand_p = false;
49143 memcpy (d.perm, perm, sizeof (perm));
49144 return ix86_expand_vec_perm_const_1 (&d);
49147 return false;
49150 void
49151 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49153 struct expand_vec_perm_d d;
49154 unsigned i, nelt;
49156 d.target = targ;
49157 d.op0 = op0;
49158 d.op1 = op1;
49159 d.vmode = GET_MODE (targ);
49160 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49161 d.one_operand_p = false;
49162 d.testing_p = false;
49164 for (i = 0; i < nelt; ++i)
49165 d.perm[i] = i * 2 + odd;
49167 /* We'll either be able to implement the permutation directly... */
49168 if (expand_vec_perm_1 (&d))
49169 return;
49171 /* ... or we use the special-case patterns. */
49172 expand_vec_perm_even_odd_1 (&d, odd);
49175 static void
49176 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49178 struct expand_vec_perm_d d;
49179 unsigned i, nelt, base;
49180 bool ok;
49182 d.target = targ;
49183 d.op0 = op0;
49184 d.op1 = op1;
49185 d.vmode = GET_MODE (targ);
49186 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49187 d.one_operand_p = false;
49188 d.testing_p = false;
49190 base = high_p ? nelt / 2 : 0;
49191 for (i = 0; i < nelt / 2; ++i)
49193 d.perm[i * 2] = i + base;
49194 d.perm[i * 2 + 1] = i + base + nelt;
49197 /* Note that for AVX this isn't one instruction. */
49198 ok = ix86_expand_vec_perm_const_1 (&d);
49199 gcc_assert (ok);
49203 /* Expand a vector operation CODE for a V*QImode in terms of the
49204 same operation on V*HImode. */
49206 void
49207 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49209 machine_mode qimode = GET_MODE (dest);
49210 machine_mode himode;
49211 rtx (*gen_il) (rtx, rtx, rtx);
49212 rtx (*gen_ih) (rtx, rtx, rtx);
49213 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49214 struct expand_vec_perm_d d;
49215 bool ok, full_interleave;
49216 bool uns_p = false;
49217 int i;
49219 switch (qimode)
49221 case E_V16QImode:
49222 himode = V8HImode;
49223 gen_il = gen_vec_interleave_lowv16qi;
49224 gen_ih = gen_vec_interleave_highv16qi;
49225 break;
49226 case E_V32QImode:
49227 himode = V16HImode;
49228 gen_il = gen_avx2_interleave_lowv32qi;
49229 gen_ih = gen_avx2_interleave_highv32qi;
49230 break;
49231 case E_V64QImode:
49232 himode = V32HImode;
49233 gen_il = gen_avx512bw_interleave_lowv64qi;
49234 gen_ih = gen_avx512bw_interleave_highv64qi;
49235 break;
49236 default:
49237 gcc_unreachable ();
49240 op2_l = op2_h = op2;
49241 switch (code)
49243 case MULT:
49244 /* Unpack data such that we've got a source byte in each low byte of
49245 each word. We don't care what goes into the high byte of each word.
49246 Rather than trying to get zero in there, most convenient is to let
49247 it be a copy of the low byte. */
49248 op2_l = gen_reg_rtx (qimode);
49249 op2_h = gen_reg_rtx (qimode);
49250 emit_insn (gen_il (op2_l, op2, op2));
49251 emit_insn (gen_ih (op2_h, op2, op2));
49253 op1_l = gen_reg_rtx (qimode);
49254 op1_h = gen_reg_rtx (qimode);
49255 emit_insn (gen_il (op1_l, op1, op1));
49256 emit_insn (gen_ih (op1_h, op1, op1));
49257 full_interleave = qimode == V16QImode;
49258 break;
49260 case ASHIFT:
49261 case LSHIFTRT:
49262 uns_p = true;
49263 /* FALLTHRU */
49264 case ASHIFTRT:
49265 op1_l = gen_reg_rtx (himode);
49266 op1_h = gen_reg_rtx (himode);
49267 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49268 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49269 full_interleave = true;
49270 break;
49271 default:
49272 gcc_unreachable ();
49275 /* Perform the operation. */
49276 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49277 1, OPTAB_DIRECT);
49278 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49279 1, OPTAB_DIRECT);
49280 gcc_assert (res_l && res_h);
49282 /* Merge the data back into the right place. */
49283 d.target = dest;
49284 d.op0 = gen_lowpart (qimode, res_l);
49285 d.op1 = gen_lowpart (qimode, res_h);
49286 d.vmode = qimode;
49287 d.nelt = GET_MODE_NUNITS (qimode);
49288 d.one_operand_p = false;
49289 d.testing_p = false;
49291 if (full_interleave)
49293 /* For SSE2, we used an full interleave, so the desired
49294 results are in the even elements. */
49295 for (i = 0; i < d.nelt; ++i)
49296 d.perm[i] = i * 2;
49298 else
49300 /* For AVX, the interleave used above was not cross-lane. So the
49301 extraction is evens but with the second and third quarter swapped.
49302 Happily, that is even one insn shorter than even extraction.
49303 For AVX512BW we have 4 lanes. We extract evens from within a lane,
49304 always first from the first and then from the second source operand,
49305 the index bits above the low 4 bits remains the same.
49306 Thus, for d.nelt == 32 we want permutation
49307 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
49308 and for d.nelt == 64 we want permutation
49309 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
49310 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */
49311 for (i = 0; i < d.nelt; ++i)
49312 d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
49315 ok = ix86_expand_vec_perm_const_1 (&d);
49316 gcc_assert (ok);
49318 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49319 gen_rtx_fmt_ee (code, qimode, op1, op2));
49322 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49323 if op is CONST_VECTOR with all odd elements equal to their
49324 preceding element. */
49326 static bool
49327 const_vector_equal_evenodd_p (rtx op)
49329 machine_mode mode = GET_MODE (op);
49330 int i, nunits = GET_MODE_NUNITS (mode);
49331 if (GET_CODE (op) != CONST_VECTOR
49332 || nunits != CONST_VECTOR_NUNITS (op))
49333 return false;
49334 for (i = 0; i < nunits; i += 2)
49335 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49336 return false;
49337 return true;
49340 void
49341 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49342 bool uns_p, bool odd_p)
49344 machine_mode mode = GET_MODE (op1);
49345 machine_mode wmode = GET_MODE (dest);
49346 rtx x;
49347 rtx orig_op1 = op1, orig_op2 = op2;
49349 if (!nonimmediate_operand (op1, mode))
49350 op1 = force_reg (mode, op1);
49351 if (!nonimmediate_operand (op2, mode))
49352 op2 = force_reg (mode, op2);
49354 /* We only play even/odd games with vectors of SImode. */
49355 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49357 /* If we're looking for the odd results, shift those members down to
49358 the even slots. For some cpus this is faster than a PSHUFD. */
49359 if (odd_p)
49361 /* For XOP use vpmacsdqh, but only for smult, as it is only
49362 signed. */
49363 if (TARGET_XOP && mode == V4SImode && !uns_p)
49365 x = force_reg (wmode, CONST0_RTX (wmode));
49366 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49367 return;
49370 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49371 if (!const_vector_equal_evenodd_p (orig_op1))
49372 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49373 x, NULL, 1, OPTAB_DIRECT);
49374 if (!const_vector_equal_evenodd_p (orig_op2))
49375 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49376 x, NULL, 1, OPTAB_DIRECT);
49377 op1 = gen_lowpart (mode, op1);
49378 op2 = gen_lowpart (mode, op2);
49381 if (mode == V16SImode)
49383 if (uns_p)
49384 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49385 else
49386 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49388 else if (mode == V8SImode)
49390 if (uns_p)
49391 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49392 else
49393 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49395 else if (uns_p)
49396 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49397 else if (TARGET_SSE4_1)
49398 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49399 else
49401 rtx s1, s2, t0, t1, t2;
49403 /* The easiest way to implement this without PMULDQ is to go through
49404 the motions as if we are performing a full 64-bit multiply. With
49405 the exception that we need to do less shuffling of the elements. */
49407 /* Compute the sign-extension, aka highparts, of the two operands. */
49408 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49409 op1, pc_rtx, pc_rtx);
49410 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49411 op2, pc_rtx, pc_rtx);
49413 /* Multiply LO(A) * HI(B), and vice-versa. */
49414 t1 = gen_reg_rtx (wmode);
49415 t2 = gen_reg_rtx (wmode);
49416 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49417 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49419 /* Multiply LO(A) * LO(B). */
49420 t0 = gen_reg_rtx (wmode);
49421 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49423 /* Combine and shift the highparts into place. */
49424 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49425 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49426 1, OPTAB_DIRECT);
49428 /* Combine high and low parts. */
49429 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49430 return;
49432 emit_insn (x);
49435 void
49436 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49437 bool uns_p, bool high_p)
49439 machine_mode wmode = GET_MODE (dest);
49440 machine_mode mode = GET_MODE (op1);
49441 rtx t1, t2, t3, t4, mask;
49443 switch (mode)
49445 case E_V4SImode:
49446 t1 = gen_reg_rtx (mode);
49447 t2 = gen_reg_rtx (mode);
49448 if (TARGET_XOP && !uns_p)
49450 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49451 shuffle the elements once so that all elements are in the right
49452 place for immediate use: { A C B D }. */
49453 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49454 const1_rtx, GEN_INT (3)));
49455 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49456 const1_rtx, GEN_INT (3)));
49458 else
49460 /* Put the elements into place for the multiply. */
49461 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49462 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49463 high_p = false;
49465 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49466 break;
49468 case E_V8SImode:
49469 /* Shuffle the elements between the lanes. After this we
49470 have { A B E F | C D G H } for each operand. */
49471 t1 = gen_reg_rtx (V4DImode);
49472 t2 = gen_reg_rtx (V4DImode);
49473 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49474 const0_rtx, const2_rtx,
49475 const1_rtx, GEN_INT (3)));
49476 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49477 const0_rtx, const2_rtx,
49478 const1_rtx, GEN_INT (3)));
49480 /* Shuffle the elements within the lanes. After this we
49481 have { A A B B | C C D D } or { E E F F | G G H H }. */
49482 t3 = gen_reg_rtx (V8SImode);
49483 t4 = gen_reg_rtx (V8SImode);
49484 mask = GEN_INT (high_p
49485 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49486 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49487 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49488 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49490 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49491 break;
49493 case E_V8HImode:
49494 case E_V16HImode:
49495 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49496 uns_p, OPTAB_DIRECT);
49497 t2 = expand_binop (mode,
49498 uns_p ? umul_highpart_optab : smul_highpart_optab,
49499 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49500 gcc_assert (t1 && t2);
49502 t3 = gen_reg_rtx (mode);
49503 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49504 emit_move_insn (dest, gen_lowpart (wmode, t3));
49505 break;
49507 case E_V16QImode:
49508 case E_V32QImode:
49509 case E_V32HImode:
49510 case E_V16SImode:
49511 case E_V64QImode:
49512 t1 = gen_reg_rtx (wmode);
49513 t2 = gen_reg_rtx (wmode);
49514 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49515 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49517 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
49518 break;
49520 default:
49521 gcc_unreachable ();
49525 void
49526 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49528 rtx res_1, res_2, res_3, res_4;
49530 res_1 = gen_reg_rtx (V4SImode);
49531 res_2 = gen_reg_rtx (V4SImode);
49532 res_3 = gen_reg_rtx (V2DImode);
49533 res_4 = gen_reg_rtx (V2DImode);
49534 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49535 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49537 /* Move the results in element 2 down to element 1; we don't care
49538 what goes in elements 2 and 3. Then we can merge the parts
49539 back together with an interleave.
49541 Note that two other sequences were tried:
49542 (1) Use interleaves at the start instead of psrldq, which allows
49543 us to use a single shufps to merge things back at the end.
49544 (2) Use shufps here to combine the two vectors, then pshufd to
49545 put the elements in the correct order.
49546 In both cases the cost of the reformatting stall was too high
49547 and the overall sequence slower. */
49549 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49550 const0_rtx, const2_rtx,
49551 const0_rtx, const0_rtx));
49552 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49553 const0_rtx, const2_rtx,
49554 const0_rtx, const0_rtx));
49555 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49557 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49560 void
49561 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49563 machine_mode mode = GET_MODE (op0);
49564 rtx t1, t2, t3, t4, t5, t6;
49566 if (TARGET_AVX512DQ && mode == V8DImode)
49567 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49568 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49569 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49570 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49571 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49572 else if (TARGET_XOP && mode == V2DImode)
49574 /* op1: A,B,C,D, op2: E,F,G,H */
49575 op1 = gen_lowpart (V4SImode, op1);
49576 op2 = gen_lowpart (V4SImode, op2);
49578 t1 = gen_reg_rtx (V4SImode);
49579 t2 = gen_reg_rtx (V4SImode);
49580 t3 = gen_reg_rtx (V2DImode);
49581 t4 = gen_reg_rtx (V2DImode);
49583 /* t1: B,A,D,C */
49584 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49585 GEN_INT (1),
49586 GEN_INT (0),
49587 GEN_INT (3),
49588 GEN_INT (2)));
49590 /* t2: (B*E),(A*F),(D*G),(C*H) */
49591 emit_insn (gen_mulv4si3 (t2, t1, op2));
49593 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49594 emit_insn (gen_xop_phadddq (t3, t2));
49596 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49597 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49599 /* Multiply lower parts and add all */
49600 t5 = gen_reg_rtx (V2DImode);
49601 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49602 gen_lowpart (V4SImode, op1),
49603 gen_lowpart (V4SImode, op2)));
49604 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49607 else
49609 machine_mode nmode;
49610 rtx (*umul) (rtx, rtx, rtx);
49612 if (mode == V2DImode)
49614 umul = gen_vec_widen_umult_even_v4si;
49615 nmode = V4SImode;
49617 else if (mode == V4DImode)
49619 umul = gen_vec_widen_umult_even_v8si;
49620 nmode = V8SImode;
49622 else if (mode == V8DImode)
49624 umul = gen_vec_widen_umult_even_v16si;
49625 nmode = V16SImode;
49627 else
49628 gcc_unreachable ();
49631 /* Multiply low parts. */
49632 t1 = gen_reg_rtx (mode);
49633 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49635 /* Shift input vectors right 32 bits so we can multiply high parts. */
49636 t6 = GEN_INT (32);
49637 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49638 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49640 /* Multiply high parts by low parts. */
49641 t4 = gen_reg_rtx (mode);
49642 t5 = gen_reg_rtx (mode);
49643 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49644 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49646 /* Combine and shift the highparts back. */
49647 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49648 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49650 /* Combine high and low parts. */
49651 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49654 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49655 gen_rtx_MULT (mode, op1, op2));
49658 /* Return 1 if control tansfer instruction INSN
49659 should be encoded with bnd prefix.
49660 If insn is NULL then return 1 when control
49661 transfer instructions should be prefixed with
49662 bnd by default for current function. */
49664 bool
49665 ix86_bnd_prefixed_insn_p (rtx insn)
49667 /* For call insns check special flag. */
49668 if (insn && CALL_P (insn))
49670 rtx call = get_call_rtx_from (insn);
49671 if (call)
49672 return CALL_EXPR_WITH_BOUNDS_P (call);
49675 /* All other insns are prefixed only if function is instrumented. */
49676 return chkp_function_instrumented_p (current_function_decl);
49679 /* Return 1 if control tansfer instruction INSN
49680 should be encoded with notrack prefix. */
49682 static bool
49683 ix86_notrack_prefixed_insn_p (rtx insn)
49685 if (!insn || !((flag_cf_protection & CF_BRANCH) && TARGET_IBT))
49686 return false;
49688 if (CALL_P (insn))
49690 rtx call = get_call_rtx_from (insn);
49691 gcc_assert (call != NULL_RTX);
49692 rtx addr = XEXP (call, 0);
49694 /* Do not emit 'notrack' if it's not an indirect call. */
49695 if (MEM_P (addr)
49696 && GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
49697 return false;
49698 else
49699 return find_reg_note (insn, REG_CALL_NOCF_CHECK, 0);
49702 if (JUMP_P (insn) && !flag_cet_switch)
49704 rtx target = JUMP_LABEL (insn);
49705 if (target == NULL_RTX || ANY_RETURN_P (target))
49706 return false;
49708 /* Check the jump is a switch table. */
49709 rtx_insn *label = as_a<rtx_insn *> (target);
49710 rtx_insn *table = next_insn (label);
49711 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
49712 return false;
49713 else
49714 return true;
49716 return false;
49719 /* Calculate integer abs() using only SSE2 instructions. */
49721 void
49722 ix86_expand_sse2_abs (rtx target, rtx input)
49724 machine_mode mode = GET_MODE (target);
49725 rtx tmp0, tmp1, x;
49727 switch (mode)
49729 /* For 32-bit signed integer X, the best way to calculate the absolute
49730 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
49731 case E_V4SImode:
49732 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
49733 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
49734 NULL, 0, OPTAB_DIRECT);
49735 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
49736 NULL, 0, OPTAB_DIRECT);
49737 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
49738 target, 0, OPTAB_DIRECT);
49739 break;
49741 /* For 16-bit signed integer X, the best way to calculate the absolute
49742 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
49743 case E_V8HImode:
49744 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49746 x = expand_simple_binop (mode, SMAX, tmp0, input,
49747 target, 0, OPTAB_DIRECT);
49748 break;
49750 /* For 8-bit signed integer X, the best way to calculate the absolute
49751 value of X is min ((unsigned char) X, (unsigned char) (-X)),
49752 as SSE2 provides the PMINUB insn. */
49753 case E_V16QImode:
49754 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49756 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
49757 target, 0, OPTAB_DIRECT);
49758 break;
49760 default:
49761 gcc_unreachable ();
49764 if (x != target)
49765 emit_move_insn (target, x);
49768 /* Expand an extract from a vector register through pextr insn.
49769 Return true if successful. */
49771 bool
49772 ix86_expand_pextr (rtx *operands)
49774 rtx dst = operands[0];
49775 rtx src = operands[1];
49777 unsigned int size = INTVAL (operands[2]);
49778 unsigned int pos = INTVAL (operands[3]);
49780 if (SUBREG_P (dst))
49782 /* Reject non-lowpart subregs. */
49783 if (SUBREG_BYTE (dst) > 0)
49784 return false;
49785 dst = SUBREG_REG (dst);
49788 if (SUBREG_P (src))
49790 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
49791 src = SUBREG_REG (src);
49794 switch (GET_MODE (src))
49796 case E_V16QImode:
49797 case E_V8HImode:
49798 case E_V4SImode:
49799 case E_V2DImode:
49800 case E_V1TImode:
49801 case E_TImode:
49803 machine_mode srcmode, dstmode;
49804 rtx d, pat;
49806 if (!int_mode_for_size (size, 0).exists (&dstmode))
49807 return false;
49809 switch (dstmode)
49811 case E_QImode:
49812 if (!TARGET_SSE4_1)
49813 return false;
49814 srcmode = V16QImode;
49815 break;
49817 case E_HImode:
49818 if (!TARGET_SSE2)
49819 return false;
49820 srcmode = V8HImode;
49821 break;
49823 case E_SImode:
49824 if (!TARGET_SSE4_1)
49825 return false;
49826 srcmode = V4SImode;
49827 break;
49829 case E_DImode:
49830 gcc_assert (TARGET_64BIT);
49831 if (!TARGET_SSE4_1)
49832 return false;
49833 srcmode = V2DImode;
49834 break;
49836 default:
49837 return false;
49840 /* Reject extractions from misaligned positions. */
49841 if (pos & (size-1))
49842 return false;
49844 if (GET_MODE (dst) == dstmode)
49845 d = dst;
49846 else
49847 d = gen_reg_rtx (dstmode);
49849 /* Construct insn pattern. */
49850 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
49851 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
49853 /* Let the rtl optimizers know about the zero extension performed. */
49854 if (dstmode == QImode || dstmode == HImode)
49856 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
49857 d = gen_lowpart (SImode, d);
49860 emit_insn (gen_rtx_SET (d, pat));
49862 if (d != dst)
49863 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
49864 return true;
49867 default:
49868 return false;
49872 /* Expand an insert into a vector register through pinsr insn.
49873 Return true if successful. */
49875 bool
49876 ix86_expand_pinsr (rtx *operands)
49878 rtx dst = operands[0];
49879 rtx src = operands[3];
49881 unsigned int size = INTVAL (operands[1]);
49882 unsigned int pos = INTVAL (operands[2]);
49884 if (SUBREG_P (dst))
49886 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
49887 dst = SUBREG_REG (dst);
49890 switch (GET_MODE (dst))
49892 case E_V16QImode:
49893 case E_V8HImode:
49894 case E_V4SImode:
49895 case E_V2DImode:
49896 case E_V1TImode:
49897 case E_TImode:
49899 machine_mode srcmode, dstmode;
49900 rtx (*pinsr)(rtx, rtx, rtx, rtx);
49901 rtx d;
49903 if (!int_mode_for_size (size, 0).exists (&srcmode))
49904 return false;
49906 switch (srcmode)
49908 case E_QImode:
49909 if (!TARGET_SSE4_1)
49910 return false;
49911 dstmode = V16QImode;
49912 pinsr = gen_sse4_1_pinsrb;
49913 break;
49915 case E_HImode:
49916 if (!TARGET_SSE2)
49917 return false;
49918 dstmode = V8HImode;
49919 pinsr = gen_sse2_pinsrw;
49920 break;
49922 case E_SImode:
49923 if (!TARGET_SSE4_1)
49924 return false;
49925 dstmode = V4SImode;
49926 pinsr = gen_sse4_1_pinsrd;
49927 break;
49929 case E_DImode:
49930 gcc_assert (TARGET_64BIT);
49931 if (!TARGET_SSE4_1)
49932 return false;
49933 dstmode = V2DImode;
49934 pinsr = gen_sse4_1_pinsrq;
49935 break;
49937 default:
49938 return false;
49941 /* Reject insertions to misaligned positions. */
49942 if (pos & (size-1))
49943 return false;
49945 if (SUBREG_P (src))
49947 unsigned int srcpos = SUBREG_BYTE (src);
49949 if (srcpos > 0)
49951 rtx extr_ops[4];
49953 extr_ops[0] = gen_reg_rtx (srcmode);
49954 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
49955 extr_ops[2] = GEN_INT (size);
49956 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
49958 if (!ix86_expand_pextr (extr_ops))
49959 return false;
49961 src = extr_ops[0];
49963 else
49964 src = gen_lowpart (srcmode, SUBREG_REG (src));
49967 if (GET_MODE (dst) == dstmode)
49968 d = dst;
49969 else
49970 d = gen_reg_rtx (dstmode);
49972 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
49973 gen_lowpart (srcmode, src),
49974 GEN_INT (1 << (pos / size))));
49975 if (d != dst)
49976 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
49977 return true;
49980 default:
49981 return false;
49985 /* This function returns the calling abi specific va_list type node.
49986 It returns the FNDECL specific va_list type. */
49988 static tree
49989 ix86_fn_abi_va_list (tree fndecl)
49991 if (!TARGET_64BIT)
49992 return va_list_type_node;
49993 gcc_assert (fndecl != NULL_TREE);
49995 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
49996 return ms_va_list_type_node;
49997 else
49998 return sysv_va_list_type_node;
50001 /* Returns the canonical va_list type specified by TYPE. If there
50002 is no valid TYPE provided, it return NULL_TREE. */
50004 static tree
50005 ix86_canonical_va_list_type (tree type)
50007 if (TARGET_64BIT)
50009 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
50010 return ms_va_list_type_node;
50012 if ((TREE_CODE (type) == ARRAY_TYPE
50013 && integer_zerop (array_type_nelts (type)))
50014 || POINTER_TYPE_P (type))
50016 tree elem_type = TREE_TYPE (type);
50017 if (TREE_CODE (elem_type) == RECORD_TYPE
50018 && lookup_attribute ("sysv_abi va_list",
50019 TYPE_ATTRIBUTES (elem_type)))
50020 return sysv_va_list_type_node;
50023 return NULL_TREE;
50026 return std_canonical_va_list_type (type);
50029 /* Iterate through the target-specific builtin types for va_list.
50030 IDX denotes the iterator, *PTREE is set to the result type of
50031 the va_list builtin, and *PNAME to its internal type.
50032 Returns zero if there is no element for this index, otherwise
50033 IDX should be increased upon the next call.
50034 Note, do not iterate a base builtin's name like __builtin_va_list.
50035 Used from c_common_nodes_and_builtins. */
50037 static int
50038 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50040 if (TARGET_64BIT)
50042 switch (idx)
50044 default:
50045 break;
50047 case 0:
50048 *ptree = ms_va_list_type_node;
50049 *pname = "__builtin_ms_va_list";
50050 return 1;
50052 case 1:
50053 *ptree = sysv_va_list_type_node;
50054 *pname = "__builtin_sysv_va_list";
50055 return 1;
50059 return 0;
50062 #undef TARGET_SCHED_DISPATCH
50063 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
50064 #undef TARGET_SCHED_DISPATCH_DO
50065 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
50066 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50067 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50068 #undef TARGET_SCHED_REORDER
50069 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
50070 #undef TARGET_SCHED_ADJUST_PRIORITY
50071 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50072 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50073 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50074 ix86_dependencies_evaluation_hook
50077 /* Implementation of reassociation_width target hook used by
50078 reassoc phase to identify parallelism level in reassociated
50079 tree. Statements tree_code is passed in OPC. Arguments type
50080 is passed in MODE. */
50082 static int
50083 ix86_reassociation_width (unsigned int op, machine_mode mode)
50085 int width = 1;
50086 /* Vector part. */
50087 if (VECTOR_MODE_P (mode))
50089 int div = 1;
50090 if (INTEGRAL_MODE_P (mode))
50091 width = ix86_cost->reassoc_vec_int;
50092 else if (FLOAT_MODE_P (mode))
50093 width = ix86_cost->reassoc_vec_fp;
50095 if (width == 1)
50096 return 1;
50098 /* Integer vector instructions execute in FP unit
50099 and can execute 3 additions and one multiplication per cycle. */
50100 if (ix86_tune == PROCESSOR_ZNVER1 && INTEGRAL_MODE_P (mode)
50101 && op != PLUS && op != MINUS)
50102 return 1;
50104 /* Account for targets that splits wide vectors into multiple parts. */
50105 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (mode) > 128)
50106 div = GET_MODE_BITSIZE (mode) / 128;
50107 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
50108 div = GET_MODE_BITSIZE (mode) / 64;
50109 width = (width + div - 1) / div;
50111 /* Scalar part. */
50112 else if (INTEGRAL_MODE_P (mode))
50113 width = ix86_cost->reassoc_int;
50114 else if (FLOAT_MODE_P (mode))
50115 width = ix86_cost->reassoc_fp;
50117 /* Avoid using too many registers in 32bit mode. */
50118 if (!TARGET_64BIT && width > 2)
50119 width = 2;
50120 return width;
50123 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
50124 place emms and femms instructions. */
50126 static machine_mode
50127 ix86_preferred_simd_mode (scalar_mode mode)
50129 if (!TARGET_SSE)
50130 return word_mode;
50132 switch (mode)
50134 case E_QImode:
50135 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
50136 return V64QImode;
50137 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50138 return V32QImode;
50139 else
50140 return V16QImode;
50142 case E_HImode:
50143 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
50144 return V32HImode;
50145 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50146 return V16HImode;
50147 else
50148 return V8HImode;
50150 case E_SImode:
50151 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50152 return V16SImode;
50153 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50154 return V8SImode;
50155 else
50156 return V4SImode;
50158 case E_DImode:
50159 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50160 return V8DImode;
50161 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50162 return V4DImode;
50163 else
50164 return V2DImode;
50166 case E_SFmode:
50167 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50168 return V16SFmode;
50169 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50170 return V8SFmode;
50171 else
50172 return V4SFmode;
50174 case E_DFmode:
50175 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50176 return V8DFmode;
50177 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50178 return V4DFmode;
50179 else if (TARGET_SSE2)
50180 return V2DFmode;
50181 /* FALLTHRU */
50183 default:
50184 return word_mode;
50188 /* All CPUs prefer to avoid cross-lane operations so perform reductions
50189 upper against lower halves up to SSE reg size. */
50191 static machine_mode
50192 ix86_split_reduction (machine_mode mode)
50194 /* Reduce lowpart against highpart until we reach SSE reg width to
50195 avoid cross-lane operations. */
50196 switch (mode)
50198 case E_V8DImode:
50199 case E_V4DImode:
50200 return V2DImode;
50201 case E_V16SImode:
50202 case E_V8SImode:
50203 return V4SImode;
50204 case E_V32HImode:
50205 case E_V16HImode:
50206 return V8HImode;
50207 case E_V64QImode:
50208 case E_V32QImode:
50209 return V16QImode;
50210 case E_V16SFmode:
50211 case E_V8SFmode:
50212 return V4SFmode;
50213 case E_V8DFmode:
50214 case E_V4DFmode:
50215 return V2DFmode;
50216 default:
50217 return mode;
50221 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
50222 vectors. If AVX512F is enabled then try vectorizing with 512bit,
50223 256bit and 128bit vectors. */
50225 static void
50226 ix86_autovectorize_vector_sizes (vector_sizes *sizes)
50228 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50230 sizes->safe_push (64);
50231 sizes->safe_push (32);
50232 sizes->safe_push (16);
50234 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50236 sizes->safe_push (32);
50237 sizes->safe_push (16);
50241 /* Implemenation of targetm.vectorize.get_mask_mode. */
50243 static opt_machine_mode
50244 ix86_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size)
50246 unsigned elem_size = vector_size / nunits;
50248 /* Scalar mask case. */
50249 if ((TARGET_AVX512F && vector_size == 64)
50250 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
50252 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
50253 return smallest_int_mode_for_size (nunits);
50256 scalar_int_mode elem_mode
50257 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
50259 gcc_assert (elem_size * nunits == vector_size);
50261 return mode_for_vector (elem_mode, nunits);
50266 /* Return class of registers which could be used for pseudo of MODE
50267 and of class RCLASS for spilling instead of memory. Return NO_REGS
50268 if it is not possible or non-profitable. */
50270 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
50272 static reg_class_t
50273 ix86_spill_class (reg_class_t rclass, machine_mode mode)
50275 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
50276 && TARGET_SSE2
50277 && TARGET_INTER_UNIT_MOVES_TO_VEC
50278 && TARGET_INTER_UNIT_MOVES_FROM_VEC
50279 && (mode == SImode || (TARGET_64BIT && mode == DImode))
50280 && INTEGER_CLASS_P (rclass))
50281 return ALL_SSE_REGS;
50282 return NO_REGS;
50285 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
50286 but returns a lower bound. */
50288 static unsigned int
50289 ix86_max_noce_ifcvt_seq_cost (edge e)
50291 bool predictable_p = predictable_edge_p (e);
50293 enum compiler_param param
50294 = (predictable_p
50295 ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
50296 : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST);
50298 /* If we have a parameter set, use that, otherwise take a guess using
50299 BRANCH_COST. */
50300 if (global_options_set.x_param_values[param])
50301 return PARAM_VALUE (param);
50302 else
50303 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
50306 /* Return true if SEQ is a good candidate as a replacement for the
50307 if-convertible sequence described in IF_INFO. */
50309 static bool
50310 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
50312 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
50314 int cmov_cnt = 0;
50315 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
50316 Maybe we should allow even more conditional moves as long as they
50317 are used far enough not to stall the CPU, or also consider
50318 IF_INFO->TEST_BB succ edge probabilities. */
50319 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
50321 rtx set = single_set (insn);
50322 if (!set)
50323 continue;
50324 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
50325 continue;
50326 rtx src = SET_SRC (set);
50327 machine_mode mode = GET_MODE (src);
50328 if (GET_MODE_CLASS (mode) != MODE_INT
50329 && GET_MODE_CLASS (mode) != MODE_FLOAT)
50330 continue;
50331 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
50332 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
50333 continue;
50334 /* insn is CMOV or FCMOV. */
50335 if (++cmov_cnt > 1)
50336 return false;
50339 return default_noce_conversion_profitable_p (seq, if_info);
50342 /* Implement targetm.vectorize.init_cost. */
50344 static void *
50345 ix86_init_cost (struct loop *)
50347 unsigned *cost = XNEWVEC (unsigned, 3);
50348 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
50349 return cost;
50352 /* Implement targetm.vectorize.add_stmt_cost. */
50354 static unsigned
50355 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
50356 struct _stmt_vec_info *stmt_info, int misalign,
50357 enum vect_cost_model_location where)
50359 unsigned *cost = (unsigned *) data;
50360 unsigned retval = 0;
50362 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
50363 int stmt_cost = - 1;
50365 if ((kind == vector_stmt || kind == scalar_stmt)
50366 && stmt_info
50367 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
50369 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
50370 bool fp = false;
50371 machine_mode mode = TImode;
50373 if (vectype != NULL)
50375 fp = FLOAT_TYPE_P (vectype);
50376 mode = TYPE_MODE (vectype);
50378 /*machine_mode inner_mode = mode;
50379 if (VECTOR_MODE_P (mode))
50380 inner_mode = GET_MODE_INNER (mode);*/
50382 switch (subcode)
50384 case PLUS_EXPR:
50385 case POINTER_PLUS_EXPR:
50386 case MINUS_EXPR:
50387 if (kind == scalar_stmt)
50389 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
50390 stmt_cost = ix86_cost->addss;
50391 else if (X87_FLOAT_MODE_P (mode))
50392 stmt_cost = ix86_cost->fadd;
50393 else
50394 stmt_cost = ix86_cost->add;
50396 else
50397 stmt_cost = ix86_vec_cost (mode,
50398 fp ? ix86_cost->addss
50399 : ix86_cost->sse_op,
50400 true);
50401 break;
50403 case MULT_EXPR:
50404 case WIDEN_MULT_EXPR:
50405 case MULT_HIGHPART_EXPR:
50406 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
50407 break;
50408 case FMA_EXPR:
50409 stmt_cost = ix86_vec_cost (mode,
50410 mode == SFmode ? ix86_cost->fmass
50411 : ix86_cost->fmasd,
50412 true);
50413 break;
50414 case NEGATE_EXPR:
50415 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
50416 stmt_cost = ix86_cost->sse_op;
50417 else if (X87_FLOAT_MODE_P (mode))
50418 stmt_cost = ix86_cost->fchs;
50419 else if (VECTOR_MODE_P (mode))
50420 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op, true);
50421 else
50422 stmt_cost = ix86_cost->add;
50423 break;
50424 case TRUNC_DIV_EXPR:
50425 case CEIL_DIV_EXPR:
50426 case FLOOR_DIV_EXPR:
50427 case ROUND_DIV_EXPR:
50428 case TRUNC_MOD_EXPR:
50429 case CEIL_MOD_EXPR:
50430 case FLOOR_MOD_EXPR:
50431 case RDIV_EXPR:
50432 case ROUND_MOD_EXPR:
50433 case EXACT_DIV_EXPR:
50434 stmt_cost = ix86_division_cost (ix86_cost, mode);
50435 break;
50437 case RSHIFT_EXPR:
50438 case LSHIFT_EXPR:
50439 case LROTATE_EXPR:
50440 case RROTATE_EXPR:
50442 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
50443 stmt_cost = ix86_shift_rotate_cost
50444 (ix86_cost, mode,
50445 TREE_CODE (op2) == INTEGER_CST,
50446 cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1,
50447 true, false, false, NULL, NULL);
50449 break;
50450 case NOP_EXPR:
50451 stmt_cost = 0;
50452 break;
50454 case BIT_IOR_EXPR:
50455 case ABS_EXPR:
50456 case MIN_EXPR:
50457 case MAX_EXPR:
50458 case BIT_XOR_EXPR:
50459 case BIT_AND_EXPR:
50460 case BIT_NOT_EXPR:
50461 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
50462 stmt_cost = ix86_cost->sse_op;
50463 else if (VECTOR_MODE_P (mode))
50464 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op, true);
50465 else
50466 stmt_cost = ix86_cost->add;
50467 break;
50468 default:
50469 break;
50472 /* If we do elementwise loads into a vector then we are bound by
50473 latency and execution resources for the many scalar loads
50474 (AGU and load ports). Try to account for this by scaling the
50475 construction cost by the number of elements involved. */
50476 if (kind == vec_construct
50477 && stmt_info
50478 && stmt_info->type == load_vec_info_type
50479 && stmt_info->memory_access_type == VMAT_ELEMENTWISE)
50481 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
50482 stmt_cost *= TYPE_VECTOR_SUBPARTS (vectype);
50484 if (stmt_cost == -1)
50485 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
50487 /* Penalize DFmode vector operations for Bonnell. */
50488 if (TARGET_BONNELL && kind == vector_stmt
50489 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
50490 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
50492 /* Statements in an inner loop relative to the loop being
50493 vectorized are weighted more heavily. The value here is
50494 arbitrary and could potentially be improved with analysis. */
50495 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
50496 count *= 50; /* FIXME. */
50498 retval = (unsigned) (count * stmt_cost);
50500 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
50501 for Silvermont as it has out of order integer pipeline and can execute
50502 2 scalar instruction per tick, but has in order SIMD pipeline. */
50503 if ((TARGET_SILVERMONT || TARGET_INTEL)
50504 && stmt_info && stmt_info->stmt)
50506 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
50507 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
50508 retval = (retval * 17) / 10;
50511 cost[where] += retval;
50513 return retval;
50516 /* Implement targetm.vectorize.finish_cost. */
50518 static void
50519 ix86_finish_cost (void *data, unsigned *prologue_cost,
50520 unsigned *body_cost, unsigned *epilogue_cost)
50522 unsigned *cost = (unsigned *) data;
50523 *prologue_cost = cost[vect_prologue];
50524 *body_cost = cost[vect_body];
50525 *epilogue_cost = cost[vect_epilogue];
50528 /* Implement targetm.vectorize.destroy_cost_data. */
50530 static void
50531 ix86_destroy_cost_data (void *data)
50533 free (data);
50536 /* Validate target specific memory model bits in VAL. */
50538 static unsigned HOST_WIDE_INT
50539 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
50541 enum memmodel model = memmodel_from_int (val);
50542 bool strong;
50544 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
50545 |MEMMODEL_MASK)
50546 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
50548 warning (OPT_Winvalid_memory_model,
50549 "unknown architecture specific memory model");
50550 return MEMMODEL_SEQ_CST;
50552 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
50553 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
50555 warning (OPT_Winvalid_memory_model,
50556 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
50557 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
50559 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
50561 warning (OPT_Winvalid_memory_model,
50562 "HLE_RELEASE not used with RELEASE or stronger memory model");
50563 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
50565 return val;
50568 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
50569 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
50570 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
50571 or number of vecsize_mangle variants that should be emitted. */
50573 static int
50574 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
50575 struct cgraph_simd_clone *clonei,
50576 tree base_type, int num)
50578 int ret = 1;
50580 if (clonei->simdlen
50581 && (clonei->simdlen < 2
50582 || clonei->simdlen > 1024
50583 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
50585 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50586 "unsupported simdlen %d", clonei->simdlen);
50587 return 0;
50590 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
50591 if (TREE_CODE (ret_type) != VOID_TYPE)
50592 switch (TYPE_MODE (ret_type))
50594 case E_QImode:
50595 case E_HImode:
50596 case E_SImode:
50597 case E_DImode:
50598 case E_SFmode:
50599 case E_DFmode:
50600 /* case E_SCmode: */
50601 /* case E_DCmode: */
50602 break;
50603 default:
50604 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50605 "unsupported return type %qT for simd", ret_type);
50606 return 0;
50609 tree t;
50610 int i;
50612 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
50613 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
50614 switch (TYPE_MODE (TREE_TYPE (t)))
50616 case E_QImode:
50617 case E_HImode:
50618 case E_SImode:
50619 case E_DImode:
50620 case E_SFmode:
50621 case E_DFmode:
50622 /* case E_SCmode: */
50623 /* case E_DCmode: */
50624 break;
50625 default:
50626 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50627 "unsupported argument type %qT for simd", TREE_TYPE (t));
50628 return 0;
50631 if (!TREE_PUBLIC (node->decl))
50633 /* If the function isn't exported, we can pick up just one ISA
50634 for the clones. */
50635 if (TARGET_AVX512F)
50636 clonei->vecsize_mangle = 'e';
50637 else if (TARGET_AVX2)
50638 clonei->vecsize_mangle = 'd';
50639 else if (TARGET_AVX)
50640 clonei->vecsize_mangle = 'c';
50641 else
50642 clonei->vecsize_mangle = 'b';
50643 ret = 1;
50645 else
50647 clonei->vecsize_mangle = "bcde"[num];
50648 ret = 4;
50650 clonei->mask_mode = VOIDmode;
50651 switch (clonei->vecsize_mangle)
50653 case 'b':
50654 clonei->vecsize_int = 128;
50655 clonei->vecsize_float = 128;
50656 break;
50657 case 'c':
50658 clonei->vecsize_int = 128;
50659 clonei->vecsize_float = 256;
50660 break;
50661 case 'd':
50662 clonei->vecsize_int = 256;
50663 clonei->vecsize_float = 256;
50664 break;
50665 case 'e':
50666 clonei->vecsize_int = 512;
50667 clonei->vecsize_float = 512;
50668 if (TYPE_MODE (base_type) == QImode)
50669 clonei->mask_mode = DImode;
50670 else
50671 clonei->mask_mode = SImode;
50672 break;
50674 if (clonei->simdlen == 0)
50676 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
50677 clonei->simdlen = clonei->vecsize_int;
50678 else
50679 clonei->simdlen = clonei->vecsize_float;
50680 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
50682 else if (clonei->simdlen > 16)
50684 /* For compatibility with ICC, use the same upper bounds
50685 for simdlen. In particular, for CTYPE below, use the return type,
50686 unless the function returns void, in that case use the characteristic
50687 type. If it is possible for given SIMDLEN to pass CTYPE value
50688 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
50689 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
50690 emit corresponding clone. */
50691 tree ctype = ret_type;
50692 if (TREE_CODE (ret_type) == VOID_TYPE)
50693 ctype = base_type;
50694 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
50695 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
50696 cnt /= clonei->vecsize_int;
50697 else
50698 cnt /= clonei->vecsize_float;
50699 if (cnt > (TARGET_64BIT ? 16 : 8))
50701 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50702 "unsupported simdlen %d", clonei->simdlen);
50703 return 0;
50706 return ret;
50709 /* Add target attribute to SIMD clone NODE if needed. */
50711 static void
50712 ix86_simd_clone_adjust (struct cgraph_node *node)
50714 const char *str = NULL;
50715 gcc_assert (node->decl == cfun->decl);
50716 switch (node->simdclone->vecsize_mangle)
50718 case 'b':
50719 if (!TARGET_SSE2)
50720 str = "sse2";
50721 break;
50722 case 'c':
50723 if (!TARGET_AVX)
50724 str = "avx";
50725 break;
50726 case 'd':
50727 if (!TARGET_AVX2)
50728 str = "avx2";
50729 break;
50730 case 'e':
50731 if (!TARGET_AVX512F)
50732 str = "avx512f";
50733 break;
50734 default:
50735 gcc_unreachable ();
50737 if (str == NULL)
50738 return;
50739 push_cfun (NULL);
50740 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
50741 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
50742 gcc_assert (ok);
50743 pop_cfun ();
50744 ix86_reset_previous_fndecl ();
50745 ix86_set_current_function (node->decl);
50748 /* If SIMD clone NODE can't be used in a vectorized loop
50749 in current function, return -1, otherwise return a badness of using it
50750 (0 if it is most desirable from vecsize_mangle point of view, 1
50751 slightly less desirable, etc.). */
50753 static int
50754 ix86_simd_clone_usable (struct cgraph_node *node)
50756 switch (node->simdclone->vecsize_mangle)
50758 case 'b':
50759 if (!TARGET_SSE2)
50760 return -1;
50761 if (!TARGET_AVX)
50762 return 0;
50763 return TARGET_AVX2 ? 2 : 1;
50764 case 'c':
50765 if (!TARGET_AVX)
50766 return -1;
50767 return TARGET_AVX2 ? 1 : 0;
50768 case 'd':
50769 if (!TARGET_AVX2)
50770 return -1;
50771 return 0;
50772 case 'e':
50773 if (!TARGET_AVX512F)
50774 return -1;
50775 return 0;
50776 default:
50777 gcc_unreachable ();
50781 /* This function adjusts the unroll factor based on
50782 the hardware capabilities. For ex, bdver3 has
50783 a loop buffer which makes unrolling of smaller
50784 loops less important. This function decides the
50785 unroll factor using number of memory references
50786 (value 32 is used) as a heuristic. */
50788 static unsigned
50789 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
50791 basic_block *bbs;
50792 rtx_insn *insn;
50793 unsigned i;
50794 unsigned mem_count = 0;
50796 if (!TARGET_ADJUST_UNROLL)
50797 return nunroll;
50799 /* Count the number of memory references within the loop body.
50800 This value determines the unrolling factor for bdver3 and bdver4
50801 architectures. */
50802 subrtx_iterator::array_type array;
50803 bbs = get_loop_body (loop);
50804 for (i = 0; i < loop->num_nodes; i++)
50805 FOR_BB_INSNS (bbs[i], insn)
50806 if (NONDEBUG_INSN_P (insn))
50807 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
50808 if (const_rtx x = *iter)
50809 if (MEM_P (x))
50811 machine_mode mode = GET_MODE (x);
50812 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
50813 if (n_words > 4)
50814 mem_count += 2;
50815 else
50816 mem_count += 1;
50818 free (bbs);
50820 if (mem_count && mem_count <=32)
50821 return MIN (nunroll, 32 / mem_count);
50823 return nunroll;
50827 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
50829 static bool
50830 ix86_float_exceptions_rounding_supported_p (void)
50832 /* For x87 floating point with standard excess precision handling,
50833 there is no adddf3 pattern (since x87 floating point only has
50834 XFmode operations) so the default hook implementation gets this
50835 wrong. */
50836 return TARGET_80387 || TARGET_SSE_MATH;
50839 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
50841 static void
50842 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
50844 if (!TARGET_80387 && !TARGET_SSE_MATH)
50845 return;
50846 tree exceptions_var = create_tmp_var_raw (integer_type_node);
50847 if (TARGET_80387)
50849 tree fenv_index_type = build_index_type (size_int (6));
50850 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
50851 tree fenv_var = create_tmp_var_raw (fenv_type);
50852 TREE_ADDRESSABLE (fenv_var) = 1;
50853 tree fenv_ptr = build_pointer_type (fenv_type);
50854 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
50855 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
50856 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
50857 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
50858 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
50859 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
50860 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
50861 tree hold_fnclex = build_call_expr (fnclex, 0);
50862 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
50863 NULL_TREE, NULL_TREE);
50864 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
50865 hold_fnclex);
50866 *clear = build_call_expr (fnclex, 0);
50867 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
50868 tree fnstsw_call = build_call_expr (fnstsw, 0);
50869 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
50870 sw_var, fnstsw_call);
50871 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
50872 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
50873 exceptions_var, exceptions_x87);
50874 *update = build2 (COMPOUND_EXPR, integer_type_node,
50875 sw_mod, update_mod);
50876 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
50877 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
50879 if (TARGET_SSE_MATH)
50881 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
50882 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
50883 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
50884 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
50885 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
50886 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
50887 mxcsr_orig_var, stmxcsr_hold_call);
50888 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
50889 mxcsr_orig_var,
50890 build_int_cst (unsigned_type_node, 0x1f80));
50891 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
50892 build_int_cst (unsigned_type_node, 0xffffffc0));
50893 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
50894 mxcsr_mod_var, hold_mod_val);
50895 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
50896 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
50897 hold_assign_orig, hold_assign_mod);
50898 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
50899 ldmxcsr_hold_call);
50900 if (*hold)
50901 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
50902 else
50903 *hold = hold_all;
50904 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
50905 if (*clear)
50906 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
50907 ldmxcsr_clear_call);
50908 else
50909 *clear = ldmxcsr_clear_call;
50910 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
50911 tree exceptions_sse = fold_convert (integer_type_node,
50912 stxmcsr_update_call);
50913 if (*update)
50915 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
50916 exceptions_var, exceptions_sse);
50917 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
50918 exceptions_var, exceptions_mod);
50919 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
50920 exceptions_assign);
50922 else
50923 *update = build2 (MODIFY_EXPR, integer_type_node,
50924 exceptions_var, exceptions_sse);
50925 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
50926 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
50927 ldmxcsr_update_call);
50929 tree atomic_feraiseexcept
50930 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
50931 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
50932 1, exceptions_var);
50933 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
50934 atomic_feraiseexcept_call);
50937 /* Return mode to be used for bounds or VOIDmode
50938 if bounds are not supported. */
50940 static machine_mode
50941 ix86_mpx_bound_mode ()
50943 /* Do not support pointer checker if MPX
50944 is not enabled. */
50945 if (!TARGET_MPX)
50947 if (flag_check_pointer_bounds)
50948 warning (0, "Pointer Checker requires MPX support on this target."
50949 " Use -mmpx options to enable MPX.");
50950 return VOIDmode;
50953 return BNDmode;
50956 /* Return constant used to statically initialize constant bounds.
50958 This function is used to create special bound values. For now
50959 only INIT bounds and NONE bounds are expected. More special
50960 values may be added later. */
50962 static tree
50963 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
50965 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
50966 : build_zero_cst (pointer_sized_int_node);
50967 tree high = ub ? build_zero_cst (pointer_sized_int_node)
50968 : build_minus_one_cst (pointer_sized_int_node);
50970 /* This function is supposed to be used to create INIT and
50971 NONE bounds only. */
50972 gcc_assert ((lb == 0 && ub == -1)
50973 || (lb == -1 && ub == 0));
50975 return build_complex (NULL, low, high);
50978 /* Generate a list of statements STMTS to initialize pointer bounds
50979 variable VAR with bounds LB and UB. Return the number of generated
50980 statements. */
50982 static int
50983 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
50985 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
50986 tree lhs, modify, var_p;
50988 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
50989 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
50991 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
50992 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
50993 append_to_statement_list (modify, stmts);
50995 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
50996 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
50997 TYPE_SIZE_UNIT (pointer_sized_int_node)));
50998 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
50999 append_to_statement_list (modify, stmts);
51001 return 2;
51004 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
51005 /* For i386, common symbol is local only for non-PIE binaries. For
51006 x86-64, common symbol is local only for non-PIE binaries or linker
51007 supports copy reloc in PIE binaries. */
51009 static bool
51010 ix86_binds_local_p (const_tree exp)
51012 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
51013 (!flag_pic
51014 || (TARGET_64BIT
51015 && HAVE_LD_PIE_COPYRELOC != 0)));
51017 #endif
51019 /* If MEM is in the form of [base+offset], extract the two parts
51020 of address and set to BASE and OFFSET, otherwise return false. */
51022 static bool
51023 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
51025 rtx addr;
51027 gcc_assert (MEM_P (mem));
51029 addr = XEXP (mem, 0);
51031 if (GET_CODE (addr) == CONST)
51032 addr = XEXP (addr, 0);
51034 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
51036 *base = addr;
51037 *offset = const0_rtx;
51038 return true;
51041 if (GET_CODE (addr) == PLUS
51042 && (REG_P (XEXP (addr, 0))
51043 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
51044 && CONST_INT_P (XEXP (addr, 1)))
51046 *base = XEXP (addr, 0);
51047 *offset = XEXP (addr, 1);
51048 return true;
51051 return false;
51054 /* Given OPERANDS of consecutive load/store, check if we can merge
51055 them into move multiple. LOAD is true if they are load instructions.
51056 MODE is the mode of memory operands. */
51058 bool
51059 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
51060 machine_mode mode)
51062 HOST_WIDE_INT offval_1, offval_2, msize;
51063 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
51065 if (load)
51067 mem_1 = operands[1];
51068 mem_2 = operands[3];
51069 reg_1 = operands[0];
51070 reg_2 = operands[2];
51072 else
51074 mem_1 = operands[0];
51075 mem_2 = operands[2];
51076 reg_1 = operands[1];
51077 reg_2 = operands[3];
51080 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
51082 if (REGNO (reg_1) != REGNO (reg_2))
51083 return false;
51085 /* Check if the addresses are in the form of [base+offset]. */
51086 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
51087 return false;
51088 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
51089 return false;
51091 /* Check if the bases are the same. */
51092 if (!rtx_equal_p (base_1, base_2))
51093 return false;
51095 offval_1 = INTVAL (offset_1);
51096 offval_2 = INTVAL (offset_2);
51097 msize = GET_MODE_SIZE (mode);
51098 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
51099 if (offval_1 + msize != offval_2)
51100 return false;
51102 return true;
51105 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
51107 static bool
51108 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
51109 optimization_type opt_type)
51111 switch (op)
51113 case asin_optab:
51114 case acos_optab:
51115 case log1p_optab:
51116 case exp_optab:
51117 case exp10_optab:
51118 case exp2_optab:
51119 case expm1_optab:
51120 case ldexp_optab:
51121 case scalb_optab:
51122 case round_optab:
51123 return opt_type == OPTIMIZE_FOR_SPEED;
51125 case rint_optab:
51126 if (SSE_FLOAT_MODE_P (mode1)
51127 && TARGET_SSE_MATH
51128 && !flag_trapping_math
51129 && !TARGET_SSE4_1)
51130 return opt_type == OPTIMIZE_FOR_SPEED;
51131 return true;
51133 case floor_optab:
51134 case ceil_optab:
51135 case btrunc_optab:
51136 if (SSE_FLOAT_MODE_P (mode1)
51137 && TARGET_SSE_MATH
51138 && !flag_trapping_math
51139 && TARGET_SSE4_1)
51140 return true;
51141 return opt_type == OPTIMIZE_FOR_SPEED;
51143 case rsqrt_optab:
51144 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
51146 default:
51147 return true;
51151 /* Address space support.
51153 This is not "far pointers" in the 16-bit sense, but an easy way
51154 to use %fs and %gs segment prefixes. Therefore:
51156 (a) All address spaces have the same modes,
51157 (b) All address spaces have the same addresss forms,
51158 (c) While %fs and %gs are technically subsets of the generic
51159 address space, they are probably not subsets of each other.
51160 (d) Since we have no access to the segment base register values
51161 without resorting to a system call, we cannot convert a
51162 non-default address space to a default address space.
51163 Therefore we do not claim %fs or %gs are subsets of generic.
51165 Therefore we can (mostly) use the default hooks. */
51167 /* All use of segmentation is assumed to make address 0 valid. */
51169 static bool
51170 ix86_addr_space_zero_address_valid (addr_space_t as)
51172 return as != ADDR_SPACE_GENERIC;
51175 static void
51176 ix86_init_libfuncs (void)
51178 if (TARGET_64BIT)
51180 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
51181 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
51183 else
51185 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
51186 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
51189 #if TARGET_MACHO
51190 darwin_rename_builtins ();
51191 #endif
51194 /* Generate call to __divmoddi4. */
51196 static void
51197 ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
51198 rtx op0, rtx op1,
51199 rtx *quot_p, rtx *rem_p)
51201 rtx rem = assign_386_stack_local (mode, SLOT_TEMP);
51203 rtx quot = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL,
51204 mode,
51205 op0, GET_MODE (op0),
51206 op1, GET_MODE (op1),
51207 XEXP (rem, 0), Pmode);
51208 *quot_p = quot;
51209 *rem_p = rem;
51212 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
51213 FPU, assume that the fpcw is set to extended precision; when using
51214 only SSE, rounding is correct; when using both SSE and the FPU,
51215 the rounding precision is indeterminate, since either may be chosen
51216 apparently at random. */
51218 static enum flt_eval_method
51219 ix86_excess_precision (enum excess_precision_type type)
51221 switch (type)
51223 case EXCESS_PRECISION_TYPE_FAST:
51224 /* The fastest type to promote to will always be the native type,
51225 whether that occurs with implicit excess precision or
51226 otherwise. */
51227 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
51228 case EXCESS_PRECISION_TYPE_STANDARD:
51229 case EXCESS_PRECISION_TYPE_IMPLICIT:
51230 /* Otherwise, the excess precision we want when we are
51231 in a standards compliant mode, and the implicit precision we
51232 provide would be identical were it not for the unpredictable
51233 cases. */
51234 if (!TARGET_80387)
51235 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
51236 else if (!TARGET_MIX_SSE_I387)
51238 if (!TARGET_SSE_MATH)
51239 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
51240 else if (TARGET_SSE2)
51241 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
51244 /* If we are in standards compliant mode, but we know we will
51245 calculate in unpredictable precision, return
51246 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
51247 excess precision if the target can't guarantee it will honor
51248 it. */
51249 return (type == EXCESS_PRECISION_TYPE_STANDARD
51250 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
51251 : FLT_EVAL_METHOD_UNPREDICTABLE);
51252 default:
51253 gcc_unreachable ();
51256 return FLT_EVAL_METHOD_UNPREDICTABLE;
51259 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
51260 decrements by exactly 2 no matter what the position was, there is no pushb.
51262 But as CIE data alignment factor on this arch is -4 for 32bit targets
51263 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
51264 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
51266 poly_int64
51267 ix86_push_rounding (poly_int64 bytes)
51269 return ROUND_UP (bytes, UNITS_PER_WORD);
51272 /* Target-specific selftests. */
51274 #if CHECKING_P
51276 namespace selftest {
51278 /* Verify that hard regs are dumped as expected (in compact mode). */
51280 static void
51281 ix86_test_dumping_hard_regs ()
51283 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
51284 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
51287 /* Test dumping an insn with repeated references to the same SCRATCH,
51288 to verify the rtx_reuse code. */
51290 static void
51291 ix86_test_dumping_memory_blockage ()
51293 set_new_first_and_last_insn (NULL, NULL);
51295 rtx pat = gen_memory_blockage ();
51296 rtx_reuse_manager r;
51297 r.preprocess (pat);
51299 /* Verify that the repeated references to the SCRATCH show use
51300 reuse IDS. The first should be prefixed with a reuse ID,
51301 and the second should be dumped as a "reuse_rtx" of that ID.
51302 The expected string assumes Pmode == DImode. */
51303 if (Pmode == DImode)
51304 ASSERT_RTL_DUMP_EQ_WITH_REUSE
51305 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
51306 " (unspec:BLK [\n"
51307 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
51308 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
51311 /* Verify loading an RTL dump; specifically a dump of copying
51312 a param on x86_64 from a hard reg into the frame.
51313 This test is target-specific since the dump contains target-specific
51314 hard reg names. */
51316 static void
51317 ix86_test_loading_dump_fragment_1 ()
51319 rtl_dump_test t (SELFTEST_LOCATION,
51320 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
51322 rtx_insn *insn = get_insn_by_uid (1);
51324 /* The block structure and indentation here is purely for
51325 readability; it mirrors the structure of the rtx. */
51326 tree mem_expr;
51328 rtx pat = PATTERN (insn);
51329 ASSERT_EQ (SET, GET_CODE (pat));
51331 rtx dest = SET_DEST (pat);
51332 ASSERT_EQ (MEM, GET_CODE (dest));
51333 /* Verify the "/c" was parsed. */
51334 ASSERT_TRUE (RTX_FLAG (dest, call));
51335 ASSERT_EQ (SImode, GET_MODE (dest));
51337 rtx addr = XEXP (dest, 0);
51338 ASSERT_EQ (PLUS, GET_CODE (addr));
51339 ASSERT_EQ (DImode, GET_MODE (addr));
51341 rtx lhs = XEXP (addr, 0);
51342 /* Verify that the "frame" REG was consolidated. */
51343 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
51346 rtx rhs = XEXP (addr, 1);
51347 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
51348 ASSERT_EQ (-4, INTVAL (rhs));
51351 /* Verify the "[1 i+0 S4 A32]" was parsed. */
51352 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
51353 /* "i" should have been handled by synthesizing a global int
51354 variable named "i". */
51355 mem_expr = MEM_EXPR (dest);
51356 ASSERT_NE (mem_expr, NULL);
51357 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
51358 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
51359 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
51360 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
51361 /* "+0". */
51362 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
51363 ASSERT_EQ (0, MEM_OFFSET (dest));
51364 /* "S4". */
51365 ASSERT_EQ (4, MEM_SIZE (dest));
51366 /* "A32. */
51367 ASSERT_EQ (32, MEM_ALIGN (dest));
51370 rtx src = SET_SRC (pat);
51371 ASSERT_EQ (REG, GET_CODE (src));
51372 ASSERT_EQ (SImode, GET_MODE (src));
51373 ASSERT_EQ (5, REGNO (src));
51374 tree reg_expr = REG_EXPR (src);
51375 /* "i" here should point to the same var as for the MEM_EXPR. */
51376 ASSERT_EQ (reg_expr, mem_expr);
51381 /* Verify that the RTL loader copes with a call_insn dump.
51382 This test is target-specific since the dump contains a target-specific
51383 hard reg name. */
51385 static void
51386 ix86_test_loading_call_insn ()
51388 /* The test dump includes register "xmm0", where requires TARGET_SSE
51389 to exist. */
51390 if (!TARGET_SSE)
51391 return;
51393 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
51395 rtx_insn *insn = get_insns ();
51396 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
51398 /* "/j". */
51399 ASSERT_TRUE (RTX_FLAG (insn, jump));
51401 rtx pat = PATTERN (insn);
51402 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
51404 /* Verify REG_NOTES. */
51406 /* "(expr_list:REG_CALL_DECL". */
51407 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
51408 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
51409 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
51411 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
51412 rtx_expr_list *note1 = note0->next ();
51413 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
51415 ASSERT_EQ (NULL, note1->next ());
51418 /* Verify CALL_INSN_FUNCTION_USAGE. */
51420 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
51421 rtx_expr_list *usage
51422 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
51423 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
51424 ASSERT_EQ (DFmode, GET_MODE (usage));
51425 ASSERT_EQ (USE, GET_CODE (usage->element ()));
51426 ASSERT_EQ (NULL, usage->next ());
51430 /* Verify that the RTL loader copes a dump from print_rtx_function.
51431 This test is target-specific since the dump contains target-specific
51432 hard reg names. */
51434 static void
51435 ix86_test_loading_full_dump ()
51437 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
51439 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
51441 rtx_insn *insn_1 = get_insn_by_uid (1);
51442 ASSERT_EQ (NOTE, GET_CODE (insn_1));
51444 rtx_insn *insn_7 = get_insn_by_uid (7);
51445 ASSERT_EQ (INSN, GET_CODE (insn_7));
51446 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
51448 rtx_insn *insn_15 = get_insn_by_uid (15);
51449 ASSERT_EQ (INSN, GET_CODE (insn_15));
51450 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
51452 /* Verify crtl->return_rtx. */
51453 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
51454 ASSERT_EQ (0, REGNO (crtl->return_rtx));
51455 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
51458 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
51459 In particular, verify that it correctly loads the 2nd operand.
51460 This test is target-specific since these are machine-specific
51461 operands (and enums). */
51463 static void
51464 ix86_test_loading_unspec ()
51466 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
51468 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
51470 ASSERT_TRUE (cfun);
51472 /* Test of an UNSPEC. */
51473 rtx_insn *insn = get_insns ();
51474 ASSERT_EQ (INSN, GET_CODE (insn));
51475 rtx set = single_set (insn);
51476 ASSERT_NE (NULL, set);
51477 rtx dst = SET_DEST (set);
51478 ASSERT_EQ (MEM, GET_CODE (dst));
51479 rtx src = SET_SRC (set);
51480 ASSERT_EQ (UNSPEC, GET_CODE (src));
51481 ASSERT_EQ (BLKmode, GET_MODE (src));
51482 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
51484 rtx v0 = XVECEXP (src, 0, 0);
51486 /* Verify that the two uses of the first SCRATCH have pointer
51487 equality. */
51488 rtx scratch_a = XEXP (dst, 0);
51489 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
51491 rtx scratch_b = XEXP (v0, 0);
51492 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
51494 ASSERT_EQ (scratch_a, scratch_b);
51496 /* Verify that the two mems are thus treated as equal. */
51497 ASSERT_TRUE (rtx_equal_p (dst, v0));
51499 /* Verify the the insn is recognized. */
51500 ASSERT_NE(-1, recog_memoized (insn));
51502 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
51503 insn = NEXT_INSN (insn);
51504 ASSERT_EQ (INSN, GET_CODE (insn));
51506 set = single_set (insn);
51507 ASSERT_NE (NULL, set);
51509 src = SET_SRC (set);
51510 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
51511 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
51514 /* Run all target-specific selftests. */
51516 static void
51517 ix86_run_selftests (void)
51519 ix86_test_dumping_hard_regs ();
51520 ix86_test_dumping_memory_blockage ();
51522 /* Various tests of loading RTL dumps, here because they contain
51523 ix86-isms (e.g. names of hard regs). */
51524 ix86_test_loading_dump_fragment_1 ();
51525 ix86_test_loading_call_insn ();
51526 ix86_test_loading_full_dump ();
51527 ix86_test_loading_unspec ();
51530 } // namespace selftest
51532 #endif /* CHECKING_P */
51534 /* Initialize the GCC target structure. */
51535 #undef TARGET_RETURN_IN_MEMORY
51536 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51538 #undef TARGET_LEGITIMIZE_ADDRESS
51539 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51541 #undef TARGET_ATTRIBUTE_TABLE
51542 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51543 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51544 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51545 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51546 # undef TARGET_MERGE_DECL_ATTRIBUTES
51547 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51548 #endif
51550 #undef TARGET_COMP_TYPE_ATTRIBUTES
51551 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51553 #undef TARGET_INIT_BUILTINS
51554 #define TARGET_INIT_BUILTINS ix86_init_builtins
51555 #undef TARGET_BUILTIN_DECL
51556 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51557 #undef TARGET_EXPAND_BUILTIN
51558 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51560 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51561 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51562 ix86_builtin_vectorized_function
51564 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51565 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51567 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
51568 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
51570 #undef TARGET_BUILTIN_RECIPROCAL
51571 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51573 #undef TARGET_ASM_FUNCTION_EPILOGUE
51574 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51576 #undef TARGET_ENCODE_SECTION_INFO
51577 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51578 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51579 #else
51580 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51581 #endif
51583 #undef TARGET_ASM_OPEN_PAREN
51584 #define TARGET_ASM_OPEN_PAREN ""
51585 #undef TARGET_ASM_CLOSE_PAREN
51586 #define TARGET_ASM_CLOSE_PAREN ""
51588 #undef TARGET_ASM_BYTE_OP
51589 #define TARGET_ASM_BYTE_OP ASM_BYTE
51591 #undef TARGET_ASM_ALIGNED_HI_OP
51592 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51593 #undef TARGET_ASM_ALIGNED_SI_OP
51594 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51595 #ifdef ASM_QUAD
51596 #undef TARGET_ASM_ALIGNED_DI_OP
51597 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51598 #endif
51600 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51601 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51603 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51604 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51606 #undef TARGET_ASM_UNALIGNED_HI_OP
51607 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51608 #undef TARGET_ASM_UNALIGNED_SI_OP
51609 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51610 #undef TARGET_ASM_UNALIGNED_DI_OP
51611 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51613 #undef TARGET_PRINT_OPERAND
51614 #define TARGET_PRINT_OPERAND ix86_print_operand
51615 #undef TARGET_PRINT_OPERAND_ADDRESS
51616 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51617 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51618 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51619 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51620 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51622 #undef TARGET_SCHED_INIT_GLOBAL
51623 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51624 #undef TARGET_SCHED_ADJUST_COST
51625 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51626 #undef TARGET_SCHED_ISSUE_RATE
51627 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51628 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51629 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51630 ia32_multipass_dfa_lookahead
51631 #undef TARGET_SCHED_MACRO_FUSION_P
51632 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51633 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51634 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51636 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51637 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51639 #undef TARGET_MEMMODEL_CHECK
51640 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51642 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51643 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51645 #ifdef HAVE_AS_TLS
51646 #undef TARGET_HAVE_TLS
51647 #define TARGET_HAVE_TLS true
51648 #endif
51649 #undef TARGET_CANNOT_FORCE_CONST_MEM
51650 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51651 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51652 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51654 #undef TARGET_DELEGITIMIZE_ADDRESS
51655 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51657 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
51658 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
51660 #undef TARGET_MS_BITFIELD_LAYOUT_P
51661 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51663 #if TARGET_MACHO
51664 #undef TARGET_BINDS_LOCAL_P
51665 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51666 #else
51667 #undef TARGET_BINDS_LOCAL_P
51668 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
51669 #endif
51670 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51671 #undef TARGET_BINDS_LOCAL_P
51672 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51673 #endif
51675 #undef TARGET_ASM_OUTPUT_MI_THUNK
51676 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51677 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51678 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51680 #undef TARGET_ASM_FILE_START
51681 #define TARGET_ASM_FILE_START x86_file_start
51683 #undef TARGET_OPTION_OVERRIDE
51684 #define TARGET_OPTION_OVERRIDE ix86_option_override
51686 #undef TARGET_REGISTER_MOVE_COST
51687 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51688 #undef TARGET_MEMORY_MOVE_COST
51689 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51690 #undef TARGET_RTX_COSTS
51691 #define TARGET_RTX_COSTS ix86_rtx_costs
51692 #undef TARGET_ADDRESS_COST
51693 #define TARGET_ADDRESS_COST ix86_address_cost
51695 #undef TARGET_FLAGS_REGNUM
51696 #define TARGET_FLAGS_REGNUM FLAGS_REG
51697 #undef TARGET_FIXED_CONDITION_CODE_REGS
51698 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51699 #undef TARGET_CC_MODES_COMPATIBLE
51700 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51702 #undef TARGET_MACHINE_DEPENDENT_REORG
51703 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51705 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51706 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51708 #undef TARGET_BUILD_BUILTIN_VA_LIST
51709 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51711 #undef TARGET_FOLD_BUILTIN
51712 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51714 #undef TARGET_GIMPLE_FOLD_BUILTIN
51715 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
51717 #undef TARGET_COMPARE_VERSION_PRIORITY
51718 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51720 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51721 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51722 ix86_generate_version_dispatcher_body
51724 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51725 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51726 ix86_get_function_versions_dispatcher
51728 #undef TARGET_ENUM_VA_LIST_P
51729 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51731 #undef TARGET_FN_ABI_VA_LIST
51732 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51734 #undef TARGET_CANONICAL_VA_LIST_TYPE
51735 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51737 #undef TARGET_EXPAND_BUILTIN_VA_START
51738 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51740 #undef TARGET_MD_ASM_ADJUST
51741 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
51743 #undef TARGET_C_EXCESS_PRECISION
51744 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
51745 #undef TARGET_PROMOTE_PROTOTYPES
51746 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51747 #undef TARGET_SETUP_INCOMING_VARARGS
51748 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51749 #undef TARGET_MUST_PASS_IN_STACK
51750 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51751 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
51752 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
51753 #undef TARGET_FUNCTION_ARG_ADVANCE
51754 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51755 #undef TARGET_FUNCTION_ARG
51756 #define TARGET_FUNCTION_ARG ix86_function_arg
51757 #undef TARGET_INIT_PIC_REG
51758 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51759 #undef TARGET_USE_PSEUDO_PIC_REG
51760 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51761 #undef TARGET_FUNCTION_ARG_BOUNDARY
51762 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51763 #undef TARGET_PASS_BY_REFERENCE
51764 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51765 #undef TARGET_INTERNAL_ARG_POINTER
51766 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51767 #undef TARGET_UPDATE_STACK_BOUNDARY
51768 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51769 #undef TARGET_GET_DRAP_RTX
51770 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51771 #undef TARGET_STRICT_ARGUMENT_NAMING
51772 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51773 #undef TARGET_STATIC_CHAIN
51774 #define TARGET_STATIC_CHAIN ix86_static_chain
51775 #undef TARGET_TRAMPOLINE_INIT
51776 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51777 #undef TARGET_RETURN_POPS_ARGS
51778 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51780 #undef TARGET_WARN_FUNC_RETURN
51781 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
51783 #undef TARGET_LEGITIMATE_COMBINED_INSN
51784 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51786 #undef TARGET_ASAN_SHADOW_OFFSET
51787 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51789 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51790 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51792 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51793 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51795 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51796 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51798 #undef TARGET_C_MODE_FOR_SUFFIX
51799 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
51801 #ifdef HAVE_AS_TLS
51802 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
51803 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
51804 #endif
51806 #ifdef SUBTARGET_INSERT_ATTRIBUTES
51807 #undef TARGET_INSERT_ATTRIBUTES
51808 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
51809 #endif
51811 #undef TARGET_MANGLE_TYPE
51812 #define TARGET_MANGLE_TYPE ix86_mangle_type
51814 #undef TARGET_STACK_PROTECT_GUARD
51815 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
51817 #if !TARGET_MACHO
51818 #undef TARGET_STACK_PROTECT_FAIL
51819 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
51820 #endif
51822 #undef TARGET_FUNCTION_VALUE
51823 #define TARGET_FUNCTION_VALUE ix86_function_value
51825 #undef TARGET_FUNCTION_VALUE_REGNO_P
51826 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
51828 #undef TARGET_PROMOTE_FUNCTION_MODE
51829 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
51831 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
51832 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
51834 #undef TARGET_MEMBER_TYPE_FORCES_BLK
51835 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
51837 #undef TARGET_INSTANTIATE_DECLS
51838 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
51840 #undef TARGET_SECONDARY_RELOAD
51841 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
51842 #undef TARGET_SECONDARY_MEMORY_NEEDED
51843 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
51844 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
51845 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
51847 #undef TARGET_CLASS_MAX_NREGS
51848 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
51850 #undef TARGET_PREFERRED_RELOAD_CLASS
51851 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
51852 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
51853 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
51854 #undef TARGET_CLASS_LIKELY_SPILLED_P
51855 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
51857 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
51858 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
51859 ix86_builtin_vectorization_cost
51860 #undef TARGET_VECTORIZE_VEC_PERM_CONST
51861 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
51862 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
51863 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
51864 ix86_preferred_simd_mode
51865 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
51866 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
51867 ix86_split_reduction
51868 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
51869 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
51870 ix86_autovectorize_vector_sizes
51871 #undef TARGET_VECTORIZE_GET_MASK_MODE
51872 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
51873 #undef TARGET_VECTORIZE_INIT_COST
51874 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
51875 #undef TARGET_VECTORIZE_ADD_STMT_COST
51876 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
51877 #undef TARGET_VECTORIZE_FINISH_COST
51878 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
51879 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
51880 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
51882 #undef TARGET_SET_CURRENT_FUNCTION
51883 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
51885 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
51886 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
51888 #undef TARGET_OPTION_SAVE
51889 #define TARGET_OPTION_SAVE ix86_function_specific_save
51891 #undef TARGET_OPTION_RESTORE
51892 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
51894 #undef TARGET_OPTION_POST_STREAM_IN
51895 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
51897 #undef TARGET_OPTION_PRINT
51898 #define TARGET_OPTION_PRINT ix86_function_specific_print
51900 #undef TARGET_OPTION_FUNCTION_VERSIONS
51901 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
51903 #undef TARGET_CAN_INLINE_P
51904 #define TARGET_CAN_INLINE_P ix86_can_inline_p
51906 #undef TARGET_LEGITIMATE_ADDRESS_P
51907 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
51909 #undef TARGET_REGISTER_PRIORITY
51910 #define TARGET_REGISTER_PRIORITY ix86_register_priority
51912 #undef TARGET_REGISTER_USAGE_LEVELING_P
51913 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
51915 #undef TARGET_LEGITIMATE_CONSTANT_P
51916 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
51918 #undef TARGET_COMPUTE_FRAME_LAYOUT
51919 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
51921 #undef TARGET_FRAME_POINTER_REQUIRED
51922 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
51924 #undef TARGET_CAN_ELIMINATE
51925 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
51927 #undef TARGET_EXTRA_LIVE_ON_ENTRY
51928 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
51930 #undef TARGET_ASM_CODE_END
51931 #define TARGET_ASM_CODE_END ix86_code_end
51933 #undef TARGET_CONDITIONAL_REGISTER_USAGE
51934 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
51936 #undef TARGET_CANONICALIZE_COMPARISON
51937 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
51939 #undef TARGET_LOOP_UNROLL_ADJUST
51940 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
51942 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
51943 #undef TARGET_SPILL_CLASS
51944 #define TARGET_SPILL_CLASS ix86_spill_class
51946 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
51947 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
51948 ix86_simd_clone_compute_vecsize_and_simdlen
51950 #undef TARGET_SIMD_CLONE_ADJUST
51951 #define TARGET_SIMD_CLONE_ADJUST \
51952 ix86_simd_clone_adjust
51954 #undef TARGET_SIMD_CLONE_USABLE
51955 #define TARGET_SIMD_CLONE_USABLE \
51956 ix86_simd_clone_usable
51958 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
51959 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
51960 ix86_float_exceptions_rounding_supported_p
51962 #undef TARGET_MODE_EMIT
51963 #define TARGET_MODE_EMIT ix86_emit_mode_set
51965 #undef TARGET_MODE_NEEDED
51966 #define TARGET_MODE_NEEDED ix86_mode_needed
51968 #undef TARGET_MODE_AFTER
51969 #define TARGET_MODE_AFTER ix86_mode_after
51971 #undef TARGET_MODE_ENTRY
51972 #define TARGET_MODE_ENTRY ix86_mode_entry
51974 #undef TARGET_MODE_EXIT
51975 #define TARGET_MODE_EXIT ix86_mode_exit
51977 #undef TARGET_MODE_PRIORITY
51978 #define TARGET_MODE_PRIORITY ix86_mode_priority
51980 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
51981 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
51983 #undef TARGET_LOAD_BOUNDS_FOR_ARG
51984 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
51986 #undef TARGET_STORE_BOUNDS_FOR_ARG
51987 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
51989 #undef TARGET_LOAD_RETURNED_BOUNDS
51990 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
51992 #undef TARGET_STORE_RETURNED_BOUNDS
51993 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
51995 #undef TARGET_CHKP_BOUND_MODE
51996 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
51998 #undef TARGET_BUILTIN_CHKP_FUNCTION
51999 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52001 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52002 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52004 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52005 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52007 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52008 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52010 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52011 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52013 #undef TARGET_OFFLOAD_OPTIONS
52014 #define TARGET_OFFLOAD_OPTIONS \
52015 ix86_offload_options
52017 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52018 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52020 #undef TARGET_OPTAB_SUPPORTED_P
52021 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
52023 #undef TARGET_HARD_REGNO_SCRATCH_OK
52024 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
52026 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
52027 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
52029 #undef TARGET_ADDITIONAL_ALLOCNO_CLASS_P
52030 #define TARGET_ADDITIONAL_ALLOCNO_CLASS_P ix86_additional_allocno_class_p
52032 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
52033 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
52035 #undef TARGET_INIT_LIBFUNCS
52036 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
52038 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
52039 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
52041 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
52042 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
52044 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
52045 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
52047 #undef TARGET_HARD_REGNO_NREGS
52048 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
52049 #undef TARGET_HARD_REGNO_MODE_OK
52050 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
52052 #undef TARGET_MODES_TIEABLE_P
52053 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
52055 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
52056 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
52057 ix86_hard_regno_call_part_clobbered
52059 #undef TARGET_CAN_CHANGE_MODE_CLASS
52060 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
52062 #undef TARGET_STATIC_RTX_ALIGNMENT
52063 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
52064 #undef TARGET_CONSTANT_ALIGNMENT
52065 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
52067 #undef TARGET_EMPTY_RECORD_P
52068 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
52070 #undef TARGET_WARN_PARAMETER_PASSING_ABI
52071 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
52073 #if CHECKING_P
52074 #undef TARGET_RUN_TARGET_SELFTESTS
52075 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
52076 #endif /* #if CHECKING_P */
52078 struct gcc_target targetm = TARGET_INITIALIZER;
52080 #include "gt-i386.h"