Add gen_(const_)vec_duplicate helpers
[official-gcc.git] / gcc / config / i386 / i386.c
blob29678722226f05730e8fafcb03c5d709866ce42a
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "memmodel.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "cfgloop.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "expmed.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "emit-rtl.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic.h"
40 #include "cfgbuild.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "attribs.h"
44 #include "calls.h"
45 #include "stor-layout.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "except.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "reload.h"
57 #include "gimplify.h"
58 #include "dwarf2.h"
59 #include "tm-constrs.h"
60 #include "params.h"
61 #include "cselib.h"
62 #include "sched-int.h"
63 #include "opts.h"
64 #include "tree-pass.h"
65 #include "context.h"
66 #include "pass_manager.h"
67 #include "target-globals.h"
68 #include "gimple-iterator.h"
69 #include "tree-vectorizer.h"
70 #include "shrink-wrap.h"
71 #include "builtins.h"
72 #include "rtl-iter.h"
73 #include "tree-iterator.h"
74 #include "tree-chkp.h"
75 #include "rtl-chkp.h"
76 #include "dbgcnt.h"
77 #include "case-cfn-macros.h"
78 #include "regrename.h"
79 #include "dojump.h"
80 #include "fold-const-call.h"
81 #include "tree-vrp.h"
82 #include "tree-ssanames.h"
83 #include "selftest.h"
84 #include "selftest-rtl.h"
85 #include "print-rtl.h"
86 #include "intl.h"
87 #include "ifcvt.h"
88 #include "symbol-summary.h"
89 #include "ipa-prop.h"
90 #include "ipa-fnsummary.h"
92 /* This file should be included last. */
93 #include "target-def.h"
95 #include "x86-tune-costs.h"
97 static rtx legitimize_dllimport_symbol (rtx, bool);
98 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
99 static rtx legitimize_pe_coff_symbol (rtx, bool);
100 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
101 static bool ix86_save_reg (unsigned int, bool, bool);
102 static bool ix86_function_naked (const_tree);
103 static bool ix86_notrack_prefixed_insn_p (rtx);
105 #ifndef CHECK_STACK_LIMIT
106 #define CHECK_STACK_LIMIT (-1)
107 #endif
109 /* Return index of given mode in mult and division cost tables. */
110 #define MODE_INDEX(mode) \
111 ((mode) == QImode ? 0 \
112 : (mode) == HImode ? 1 \
113 : (mode) == SImode ? 2 \
114 : (mode) == DImode ? 3 \
115 : 4)
118 /* Set by -mtune. */
119 const struct processor_costs *ix86_tune_cost = NULL;
121 /* Set by -mtune or -Os. */
122 const struct processor_costs *ix86_cost = NULL;
124 /* Processor feature/optimization bitmasks. */
125 #define m_386 (1U<<PROCESSOR_I386)
126 #define m_486 (1U<<PROCESSOR_I486)
127 #define m_PENT (1U<<PROCESSOR_PENTIUM)
128 #define m_LAKEMONT (1U<<PROCESSOR_LAKEMONT)
129 #define m_PPRO (1U<<PROCESSOR_PENTIUMPRO)
130 #define m_PENT4 (1U<<PROCESSOR_PENTIUM4)
131 #define m_NOCONA (1U<<PROCESSOR_NOCONA)
132 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
133 #define m_CORE2 (1U<<PROCESSOR_CORE2)
134 #define m_NEHALEM (1U<<PROCESSOR_NEHALEM)
135 #define m_SANDYBRIDGE (1U<<PROCESSOR_SANDYBRIDGE)
136 #define m_HASWELL (1U<<PROCESSOR_HASWELL)
137 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
138 #define m_BONNELL (1U<<PROCESSOR_BONNELL)
139 #define m_SILVERMONT (1U<<PROCESSOR_SILVERMONT)
140 #define m_KNL (1U<<PROCESSOR_KNL)
141 #define m_KNM (1U<<PROCESSOR_KNM)
142 #define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512)
143 #define m_INTEL (1U<<PROCESSOR_INTEL)
145 #define m_GEODE (1U<<PROCESSOR_GEODE)
146 #define m_K6 (1U<<PROCESSOR_K6)
147 #define m_K6_GEODE (m_K6 | m_GEODE)
148 #define m_K8 (1U<<PROCESSOR_K8)
149 #define m_ATHLON (1U<<PROCESSOR_ATHLON)
150 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
151 #define m_AMDFAM10 (1U<<PROCESSOR_AMDFAM10)
152 #define m_BDVER1 (1U<<PROCESSOR_BDVER1)
153 #define m_BDVER2 (1U<<PROCESSOR_BDVER2)
154 #define m_BDVER3 (1U<<PROCESSOR_BDVER3)
155 #define m_BDVER4 (1U<<PROCESSOR_BDVER4)
156 #define m_ZNVER1 (1U<<PROCESSOR_ZNVER1)
157 #define m_BTVER1 (1U<<PROCESSOR_BTVER1)
158 #define m_BTVER2 (1U<<PROCESSOR_BTVER2)
159 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
160 #define m_BTVER (m_BTVER1 | m_BTVER2)
161 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
162 | m_ZNVER1)
164 #define m_GENERIC (1U<<PROCESSOR_GENERIC)
166 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
167 #undef DEF_TUNE
168 #define DEF_TUNE(tune, name, selector) name,
169 #include "x86-tune.def"
170 #undef DEF_TUNE
173 /* Feature tests against the various tunings. */
174 unsigned char ix86_tune_features[X86_TUNE_LAST];
176 /* Feature tests against the various tunings used to create ix86_tune_features
177 based on the processor mask. */
178 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
179 #undef DEF_TUNE
180 #define DEF_TUNE(tune, name, selector) selector,
181 #include "x86-tune.def"
182 #undef DEF_TUNE
185 /* Feature tests against the various architecture variations. */
186 unsigned char ix86_arch_features[X86_ARCH_LAST];
188 /* Feature tests against the various architecture variations, used to create
189 ix86_arch_features based on the processor mask. */
190 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
191 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
192 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
194 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
195 ~m_386,
197 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
198 ~(m_386 | m_486),
200 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
201 ~m_386,
203 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
204 ~m_386,
207 /* In case the average insn count for single function invocation is
208 lower than this constant, emit fast (but longer) prologue and
209 epilogue code. */
210 #define FAST_PROLOGUE_INSN_COUNT 20
212 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
213 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
214 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
215 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
217 /* Array of the smallest class containing reg number REGNO, indexed by
218 REGNO. Used by REGNO_REG_CLASS in i386.h. */
220 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
222 /* ax, dx, cx, bx */
223 AREG, DREG, CREG, BREG,
224 /* si, di, bp, sp */
225 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
226 /* FP registers */
227 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
228 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
229 /* arg pointer */
230 NON_Q_REGS,
231 /* flags, fpsr, fpcr, frame */
232 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
233 /* SSE registers */
234 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
235 SSE_REGS, SSE_REGS,
236 /* MMX registers */
237 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
238 MMX_REGS, MMX_REGS,
239 /* REX registers */
240 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
241 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
242 /* SSE REX registers */
243 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
244 SSE_REGS, SSE_REGS,
245 /* AVX-512 SSE registers */
246 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
247 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
248 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
249 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
250 /* Mask registers. */
251 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
252 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
253 /* MPX bound registers */
254 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
257 /* The "default" register map used in 32bit mode. */
259 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
261 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
262 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
263 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
264 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
265 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
267 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
268 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
269 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
270 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
271 101, 102, 103, 104, /* bound registers */
274 /* The "default" register map used in 64bit mode. */
276 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
278 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
279 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
280 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
281 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
282 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
283 8,9,10,11,12,13,14,15, /* extended integer registers */
284 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
285 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
286 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
287 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
288 126, 127, 128, 129, /* bound registers */
291 /* Define the register numbers to be used in Dwarf debugging information.
292 The SVR4 reference port C compiler uses the following register numbers
293 in its Dwarf output code:
294 0 for %eax (gcc regno = 0)
295 1 for %ecx (gcc regno = 2)
296 2 for %edx (gcc regno = 1)
297 3 for %ebx (gcc regno = 3)
298 4 for %esp (gcc regno = 7)
299 5 for %ebp (gcc regno = 6)
300 6 for %esi (gcc regno = 4)
301 7 for %edi (gcc regno = 5)
302 The following three DWARF register numbers are never generated by
303 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
304 believed these numbers have these meanings.
305 8 for %eip (no gcc equivalent)
306 9 for %eflags (gcc regno = 17)
307 10 for %trapno (no gcc equivalent)
308 It is not at all clear how we should number the FP stack registers
309 for the x86 architecture. If the version of SDB on x86/svr4 were
310 a bit less brain dead with respect to floating-point then we would
311 have a precedent to follow with respect to DWARF register numbers
312 for x86 FP registers, but the SDB on x86/svr4 was so completely
313 broken with respect to FP registers that it is hardly worth thinking
314 of it as something to strive for compatibility with.
315 The version of x86/svr4 SDB I had does (partially)
316 seem to believe that DWARF register number 11 is associated with
317 the x86 register %st(0), but that's about all. Higher DWARF
318 register numbers don't seem to be associated with anything in
319 particular, and even for DWARF regno 11, SDB only seemed to under-
320 stand that it should say that a variable lives in %st(0) (when
321 asked via an `=' command) if we said it was in DWARF regno 11,
322 but SDB still printed garbage when asked for the value of the
323 variable in question (via a `/' command).
324 (Also note that the labels SDB printed for various FP stack regs
325 when doing an `x' command were all wrong.)
326 Note that these problems generally don't affect the native SVR4
327 C compiler because it doesn't allow the use of -O with -g and
328 because when it is *not* optimizing, it allocates a memory
329 location for each floating-point variable, and the memory
330 location is what gets described in the DWARF AT_location
331 attribute for the variable in question.
332 Regardless of the severe mental illness of the x86/svr4 SDB, we
333 do something sensible here and we use the following DWARF
334 register numbers. Note that these are all stack-top-relative
335 numbers.
336 11 for %st(0) (gcc regno = 8)
337 12 for %st(1) (gcc regno = 9)
338 13 for %st(2) (gcc regno = 10)
339 14 for %st(3) (gcc regno = 11)
340 15 for %st(4) (gcc regno = 12)
341 16 for %st(5) (gcc regno = 13)
342 17 for %st(6) (gcc regno = 14)
343 18 for %st(7) (gcc regno = 15)
345 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
347 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
348 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
349 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
350 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
351 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
352 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
353 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
354 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
355 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
356 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
357 101, 102, 103, 104, /* bound registers */
360 /* Define parameter passing and return registers. */
362 static int const x86_64_int_parameter_registers[6] =
364 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
367 static int const x86_64_ms_abi_int_parameter_registers[4] =
369 CX_REG, DX_REG, R8_REG, R9_REG
372 static int const x86_64_int_return_registers[4] =
374 AX_REG, DX_REG, DI_REG, SI_REG
377 /* Additional registers that are clobbered by SYSV calls. */
379 #define NUM_X86_64_MS_CLOBBERED_REGS 12
380 static int const x86_64_ms_sysv_extra_clobbered_registers
381 [NUM_X86_64_MS_CLOBBERED_REGS] =
383 SI_REG, DI_REG,
384 XMM6_REG, XMM7_REG,
385 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
386 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
389 enum xlogue_stub {
390 XLOGUE_STUB_SAVE,
391 XLOGUE_STUB_RESTORE,
392 XLOGUE_STUB_RESTORE_TAIL,
393 XLOGUE_STUB_SAVE_HFP,
394 XLOGUE_STUB_RESTORE_HFP,
395 XLOGUE_STUB_RESTORE_HFP_TAIL,
397 XLOGUE_STUB_COUNT
400 enum xlogue_stub_sets {
401 XLOGUE_SET_ALIGNED,
402 XLOGUE_SET_ALIGNED_PLUS_8,
403 XLOGUE_SET_HFP_ALIGNED_OR_REALIGN,
404 XLOGUE_SET_HFP_ALIGNED_PLUS_8,
406 XLOGUE_SET_COUNT
409 /* Register save/restore layout used by out-of-line stubs. */
410 class xlogue_layout {
411 public:
412 struct reginfo
414 unsigned regno;
415 HOST_WIDE_INT offset; /* Offset used by stub base pointer (rax or
416 rsi) to where each register is stored. */
419 unsigned get_nregs () const {return m_nregs;}
420 HOST_WIDE_INT get_stack_align_off_in () const {return m_stack_align_off_in;}
422 const reginfo &get_reginfo (unsigned reg) const
424 gcc_assert (reg < m_nregs);
425 return m_regs[reg];
428 static const char *get_stub_name (enum xlogue_stub stub,
429 unsigned n_extra_args);
431 /* Returns an rtx for the stub's symbol based upon
432 1.) the specified stub (save, restore or restore_ret) and
433 2.) the value of cfun->machine->call_ms2sysv_extra_regs and
434 3.) rather or not stack alignment is being performed. */
435 static rtx get_stub_rtx (enum xlogue_stub stub);
437 /* Returns the amount of stack space (including padding) that the stub
438 needs to store registers based upon data in the machine_function. */
439 HOST_WIDE_INT get_stack_space_used () const
441 const struct machine_function *m = cfun->machine;
442 unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1;
444 gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS);
445 return m_regs[last_reg].offset + STUB_INDEX_OFFSET;
448 /* Returns the offset for the base pointer used by the stub. */
449 HOST_WIDE_INT get_stub_ptr_offset () const
451 return STUB_INDEX_OFFSET + m_stack_align_off_in;
454 static const struct xlogue_layout &get_instance ();
455 static unsigned count_stub_managed_regs ();
456 static bool is_stub_managed_reg (unsigned regno, unsigned count);
458 static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70;
459 static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS;
460 static const unsigned MAX_REGS = 18;
461 static const unsigned MAX_EXTRA_REGS = MAX_REGS - MIN_REGS;
462 static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1;
463 static const unsigned STUB_NAME_MAX_LEN = 20;
464 static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT];
465 static const unsigned REG_ORDER[MAX_REGS];
466 static const unsigned REG_ORDER_REALIGN[MAX_REGS];
468 private:
469 xlogue_layout ();
470 xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp);
471 xlogue_layout (const xlogue_layout &);
473 /* True if hard frame pointer is used. */
474 bool m_hfp;
476 /* Max number of register this layout manages. */
477 unsigned m_nregs;
479 /* Incoming offset from 16-byte alignment. */
480 HOST_WIDE_INT m_stack_align_off_in;
482 /* Register order and offsets. */
483 struct reginfo m_regs[MAX_REGS];
485 /* Lazy-inited cache of symbol names for stubs. */
486 static char s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
487 [STUB_NAME_MAX_LEN];
489 static const xlogue_layout s_instances[XLOGUE_SET_COUNT];
492 const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
493 "savms64",
494 "resms64",
495 "resms64x",
496 "savms64f",
497 "resms64f",
498 "resms64fx"
501 const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
502 /* The below offset values are where each register is stored for the layout
503 relative to incoming stack pointer. The value of each m_regs[].offset will
504 be relative to the incoming base pointer (rax or rsi) used by the stub.
506 s_instances: 0 1 2 3
507 Offset: realigned or aligned + 8
508 Register aligned aligned + 8 aligned w/HFP w/HFP */
509 XMM15_REG, /* 0x10 0x18 0x10 0x18 */
510 XMM14_REG, /* 0x20 0x28 0x20 0x28 */
511 XMM13_REG, /* 0x30 0x38 0x30 0x38 */
512 XMM12_REG, /* 0x40 0x48 0x40 0x48 */
513 XMM11_REG, /* 0x50 0x58 0x50 0x58 */
514 XMM10_REG, /* 0x60 0x68 0x60 0x68 */
515 XMM9_REG, /* 0x70 0x78 0x70 0x78 */
516 XMM8_REG, /* 0x80 0x88 0x80 0x88 */
517 XMM7_REG, /* 0x90 0x98 0x90 0x98 */
518 XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */
519 SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */
520 DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */
521 BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */
522 BP_REG, /* 0xc0 0xc8 N/A N/A */
523 R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */
524 R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */
525 R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */
526 R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */
529 /* Instantiate static const values. */
530 const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
531 const unsigned xlogue_layout::MIN_REGS;
532 const unsigned xlogue_layout::MAX_REGS;
533 const unsigned xlogue_layout::MAX_EXTRA_REGS;
534 const unsigned xlogue_layout::VARIANT_COUNT;
535 const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
537 /* Initialize xlogue_layout::s_stub_names to zero. */
538 char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
539 [STUB_NAME_MAX_LEN];
541 /* Instantiates all xlogue_layout instances. */
542 const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
543 xlogue_layout (0, false),
544 xlogue_layout (8, false),
545 xlogue_layout (0, true),
546 xlogue_layout (8, true)
549 /* Return an appropriate const instance of xlogue_layout based upon values
550 in cfun->machine and crtl. */
551 const struct xlogue_layout &
552 xlogue_layout::get_instance ()
554 enum xlogue_stub_sets stub_set;
555 bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
557 if (stack_realign_fp)
558 stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
559 else if (frame_pointer_needed)
560 stub_set = aligned_plus_8
561 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
562 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
563 else
564 stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
566 return s_instances[stub_set];
569 /* Determine how many clobbered registers can be saved by the stub.
570 Returns the count of registers the stub will save and restore. */
571 unsigned
572 xlogue_layout::count_stub_managed_regs ()
574 bool hfp = frame_pointer_needed || stack_realign_fp;
575 unsigned i, count;
576 unsigned regno;
578 for (count = i = MIN_REGS; i < MAX_REGS; ++i)
580 regno = REG_ORDER[i];
581 if (regno == BP_REG && hfp)
582 continue;
583 if (!ix86_save_reg (regno, false, false))
584 break;
585 ++count;
587 return count;
590 /* Determine if register REGNO is a stub managed register given the
591 total COUNT of stub managed registers. */
592 bool
593 xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
595 bool hfp = frame_pointer_needed || stack_realign_fp;
596 unsigned i;
598 for (i = 0; i < count; ++i)
600 gcc_assert (i < MAX_REGS);
601 if (REG_ORDER[i] == BP_REG && hfp)
602 ++count;
603 else if (REG_ORDER[i] == regno)
604 return true;
606 return false;
609 /* Constructor for xlogue_layout. */
610 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
611 : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
612 m_stack_align_off_in (stack_align_off_in)
614 HOST_WIDE_INT offset = stack_align_off_in;
615 unsigned i, j;
617 for (i = j = 0; i < MAX_REGS; ++i)
619 unsigned regno = REG_ORDER[i];
621 if (regno == BP_REG && hfp)
622 continue;
623 if (SSE_REGNO_P (regno))
625 offset += 16;
626 /* Verify that SSE regs are always aligned. */
627 gcc_assert (!((stack_align_off_in + offset) & 15));
629 else
630 offset += 8;
632 m_regs[j].regno = regno;
633 m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
635 gcc_assert (j == m_nregs);
638 const char *
639 xlogue_layout::get_stub_name (enum xlogue_stub stub,
640 unsigned n_extra_regs)
642 const int have_avx = TARGET_AVX;
643 char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
645 /* Lazy init */
646 if (!*name)
648 int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
649 (have_avx ? "avx" : "sse"),
650 STUB_BASE_NAMES[stub],
651 MIN_REGS + n_extra_regs);
652 gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
655 return name;
658 /* Return rtx of a symbol ref for the entry point (based upon
659 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
661 xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
663 const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
664 gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
665 gcc_assert (stub < XLOGUE_STUB_COUNT);
666 gcc_assert (crtl->stack_realign_finalized);
668 return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
671 /* Define the structure for the machine field in struct function. */
673 struct GTY(()) stack_local_entry {
674 unsigned short mode;
675 unsigned short n;
676 rtx rtl;
677 struct stack_local_entry *next;
680 /* Which cpu are we scheduling for. */
681 enum attr_cpu ix86_schedule;
683 /* Which cpu are we optimizing for. */
684 enum processor_type ix86_tune;
686 /* Which instruction set architecture to use. */
687 enum processor_type ix86_arch;
689 /* True if processor has SSE prefetch instruction. */
690 unsigned char x86_prefetch_sse;
692 /* -mstackrealign option */
693 static const char ix86_force_align_arg_pointer_string[]
694 = "force_align_arg_pointer";
696 static rtx (*ix86_gen_leave) (void);
697 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
698 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
699 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
700 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
701 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
702 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
703 static rtx (*ix86_gen_clzero) (rtx);
704 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
705 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
706 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
707 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
708 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
709 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
711 /* Preferred alignment for stack boundary in bits. */
712 unsigned int ix86_preferred_stack_boundary;
714 /* Alignment for incoming stack boundary in bits specified at
715 command line. */
716 static unsigned int ix86_user_incoming_stack_boundary;
718 /* Default alignment for incoming stack boundary in bits. */
719 static unsigned int ix86_default_incoming_stack_boundary;
721 /* Alignment for incoming stack boundary in bits. */
722 unsigned int ix86_incoming_stack_boundary;
724 /* Calling abi specific va_list type nodes. */
725 static GTY(()) tree sysv_va_list_type_node;
726 static GTY(()) tree ms_va_list_type_node;
728 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
729 char internal_label_prefix[16];
730 int internal_label_prefix_len;
732 /* Fence to use after loop using movnt. */
733 tree x86_mfence;
735 /* Register class used for passing given 64bit part of the argument.
736 These represent classes as documented by the PS ABI, with the exception
737 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
738 use SF or DFmode move instead of DImode to avoid reformatting penalties.
740 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
741 whenever possible (upper half does contain padding). */
742 enum x86_64_reg_class
744 X86_64_NO_CLASS,
745 X86_64_INTEGER_CLASS,
746 X86_64_INTEGERSI_CLASS,
747 X86_64_SSE_CLASS,
748 X86_64_SSESF_CLASS,
749 X86_64_SSEDF_CLASS,
750 X86_64_SSEUP_CLASS,
751 X86_64_X87_CLASS,
752 X86_64_X87UP_CLASS,
753 X86_64_COMPLEX_X87_CLASS,
754 X86_64_MEMORY_CLASS
757 #define MAX_CLASSES 8
759 /* Table of constants used by fldpi, fldln2, etc.... */
760 static REAL_VALUE_TYPE ext_80387_constants_table [5];
761 static bool ext_80387_constants_init;
764 static struct machine_function * ix86_init_machine_status (void);
765 static rtx ix86_function_value (const_tree, const_tree, bool);
766 static bool ix86_function_value_regno_p (const unsigned int);
767 static unsigned int ix86_function_arg_boundary (machine_mode,
768 const_tree);
769 static rtx ix86_static_chain (const_tree, bool);
770 static int ix86_function_regparm (const_tree, const_tree);
771 static void ix86_compute_frame_layout (void);
772 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
773 rtx, rtx, int);
774 static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT);
775 static tree ix86_canonical_va_list_type (tree);
776 static void predict_jump (int);
777 static unsigned int split_stack_prologue_scratch_regno (void);
778 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
780 enum ix86_function_specific_strings
782 IX86_FUNCTION_SPECIFIC_ARCH,
783 IX86_FUNCTION_SPECIFIC_TUNE,
784 IX86_FUNCTION_SPECIFIC_MAX
787 static char *ix86_target_string (HOST_WIDE_INT, HOST_WIDE_INT, int, int,
788 const char *, const char *, enum fpmath_unit,
789 bool);
790 static void ix86_function_specific_save (struct cl_target_option *,
791 struct gcc_options *opts);
792 static void ix86_function_specific_restore (struct gcc_options *opts,
793 struct cl_target_option *);
794 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
795 static void ix86_function_specific_print (FILE *, int,
796 struct cl_target_option *);
797 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
798 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
799 struct gcc_options *,
800 struct gcc_options *,
801 struct gcc_options *);
802 static bool ix86_can_inline_p (tree, tree);
803 static void ix86_set_current_function (tree);
804 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
806 static enum calling_abi ix86_function_abi (const_tree);
809 #ifndef SUBTARGET32_DEFAULT_CPU
810 #define SUBTARGET32_DEFAULT_CPU "i386"
811 #endif
813 /* Whether -mtune= or -march= were specified */
814 static int ix86_tune_defaulted;
815 static int ix86_arch_specified;
817 /* Vectorization library interface and handlers. */
818 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
820 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
821 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
823 /* Processor target table, indexed by processor number */
824 struct ptt
826 const char *const name; /* processor name */
827 const struct processor_costs *cost; /* Processor costs */
828 const int align_loop; /* Default alignments. */
829 const int align_loop_max_skip;
830 const int align_jump;
831 const int align_jump_max_skip;
832 const int align_func;
835 /* This table must be in sync with enum processor_type in i386.h. */
836 static const struct ptt processor_target_table[PROCESSOR_max] =
838 {"generic", &generic_cost, 16, 10, 16, 10, 16},
839 {"i386", &i386_cost, 4, 3, 4, 3, 4},
840 {"i486", &i486_cost, 16, 15, 16, 15, 16},
841 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
842 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
843 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
844 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
845 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
846 {"core2", &core_cost, 16, 10, 16, 10, 16},
847 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
848 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
849 {"haswell", &core_cost, 16, 10, 16, 10, 16},
850 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
851 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
852 {"knl", &slm_cost, 16, 15, 16, 7, 16},
853 {"knm", &slm_cost, 16, 15, 16, 7, 16},
854 {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
855 {"intel", &intel_cost, 16, 15, 16, 7, 16},
856 {"geode", &geode_cost, 0, 0, 0, 0, 0},
857 {"k6", &k6_cost, 32, 7, 32, 7, 32},
858 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
859 {"k8", &k8_cost, 16, 7, 16, 7, 16},
860 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
861 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
862 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
863 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
864 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
865 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
866 {"btver2", &btver2_cost, 16, 10, 16, 7, 11},
867 {"znver1", &znver1_cost, 16, 15, 16, 15, 16}
870 static unsigned int
871 rest_of_handle_insert_vzeroupper (void)
873 int i;
875 /* vzeroupper instructions are inserted immediately after reload to
876 account for possible spills from 256bit registers. The pass
877 reuses mode switching infrastructure by re-running mode insertion
878 pass, so disable entities that have already been processed. */
879 for (i = 0; i < MAX_386_ENTITIES; i++)
880 ix86_optimize_mode_switching[i] = 0;
882 ix86_optimize_mode_switching[AVX_U128] = 1;
884 /* Call optimize_mode_switching. */
885 g->get_passes ()->execute_pass_mode_switching ();
886 return 0;
889 /* Return 1 if INSN uses or defines a hard register.
890 Hard register uses in a memory address are ignored.
891 Clobbers and flags definitions are ignored. */
893 static bool
894 has_non_address_hard_reg (rtx_insn *insn)
896 df_ref ref;
897 FOR_EACH_INSN_DEF (ref, insn)
898 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
899 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
900 && DF_REF_REGNO (ref) != FLAGS_REG)
901 return true;
903 FOR_EACH_INSN_USE (ref, insn)
904 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
905 return true;
907 return false;
910 /* Check if comparison INSN may be transformed
911 into vector comparison. Currently we transform
912 zero checks only which look like:
914 (set (reg:CCZ 17 flags)
915 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
916 (subreg:SI (reg:DI x) 0))
917 (const_int 0 [0]))) */
919 static bool
920 convertible_comparison_p (rtx_insn *insn)
922 if (!TARGET_SSE4_1)
923 return false;
925 rtx def_set = single_set (insn);
927 gcc_assert (def_set);
929 rtx src = SET_SRC (def_set);
930 rtx dst = SET_DEST (def_set);
932 gcc_assert (GET_CODE (src) == COMPARE);
934 if (GET_CODE (dst) != REG
935 || REGNO (dst) != FLAGS_REG
936 || GET_MODE (dst) != CCZmode)
937 return false;
939 rtx op1 = XEXP (src, 0);
940 rtx op2 = XEXP (src, 1);
942 if (op2 != CONST0_RTX (GET_MODE (op2)))
943 return false;
945 if (GET_CODE (op1) != IOR)
946 return false;
948 op2 = XEXP (op1, 1);
949 op1 = XEXP (op1, 0);
951 if (!SUBREG_P (op1)
952 || !SUBREG_P (op2)
953 || GET_MODE (op1) != SImode
954 || GET_MODE (op2) != SImode
955 || ((SUBREG_BYTE (op1) != 0
956 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
957 && (SUBREG_BYTE (op2) != 0
958 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
959 return false;
961 op1 = SUBREG_REG (op1);
962 op2 = SUBREG_REG (op2);
964 if (op1 != op2
965 || !REG_P (op1)
966 || GET_MODE (op1) != DImode)
967 return false;
969 return true;
972 /* The DImode version of scalar_to_vector_candidate_p. */
974 static bool
975 dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
977 rtx def_set = single_set (insn);
979 if (!def_set)
980 return false;
982 if (has_non_address_hard_reg (insn))
983 return false;
985 rtx src = SET_SRC (def_set);
986 rtx dst = SET_DEST (def_set);
988 if (GET_CODE (src) == COMPARE)
989 return convertible_comparison_p (insn);
991 /* We are interested in DImode promotion only. */
992 if ((GET_MODE (src) != DImode
993 && !CONST_INT_P (src))
994 || GET_MODE (dst) != DImode)
995 return false;
997 if (!REG_P (dst) && !MEM_P (dst))
998 return false;
1000 switch (GET_CODE (src))
1002 case ASHIFTRT:
1003 if (!TARGET_AVX512VL)
1004 return false;
1005 /* FALLTHRU */
1007 case ASHIFT:
1008 case LSHIFTRT:
1009 if (!REG_P (XEXP (src, 1))
1010 && (!SUBREG_P (XEXP (src, 1))
1011 || SUBREG_BYTE (XEXP (src, 1)) != 0
1012 || !REG_P (SUBREG_REG (XEXP (src, 1))))
1013 && (!CONST_INT_P (XEXP (src, 1))
1014 || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)))
1015 return false;
1017 if (GET_MODE (XEXP (src, 1)) != QImode
1018 && !CONST_INT_P (XEXP (src, 1)))
1019 return false;
1020 break;
1022 case PLUS:
1023 case MINUS:
1024 case IOR:
1025 case XOR:
1026 case AND:
1027 if (!REG_P (XEXP (src, 1))
1028 && !MEM_P (XEXP (src, 1))
1029 && !CONST_INT_P (XEXP (src, 1)))
1030 return false;
1032 if (GET_MODE (XEXP (src, 1)) != DImode
1033 && !CONST_INT_P (XEXP (src, 1)))
1034 return false;
1035 break;
1037 case NEG:
1038 case NOT:
1039 break;
1041 case REG:
1042 return true;
1044 case MEM:
1045 case CONST_INT:
1046 return REG_P (dst);
1048 default:
1049 return false;
1052 if (!REG_P (XEXP (src, 0))
1053 && !MEM_P (XEXP (src, 0))
1054 && !CONST_INT_P (XEXP (src, 0))
1055 /* Check for andnot case. */
1056 && (GET_CODE (src) != AND
1057 || GET_CODE (XEXP (src, 0)) != NOT
1058 || !REG_P (XEXP (XEXP (src, 0), 0))))
1059 return false;
1061 if (GET_MODE (XEXP (src, 0)) != DImode
1062 && !CONST_INT_P (XEXP (src, 0)))
1063 return false;
1065 return true;
1068 /* The TImode version of scalar_to_vector_candidate_p. */
1070 static bool
1071 timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1073 rtx def_set = single_set (insn);
1075 if (!def_set)
1076 return false;
1078 if (has_non_address_hard_reg (insn))
1079 return false;
1081 rtx src = SET_SRC (def_set);
1082 rtx dst = SET_DEST (def_set);
1084 /* Only TImode load and store are allowed. */
1085 if (GET_MODE (dst) != TImode)
1086 return false;
1088 if (MEM_P (dst))
1090 /* Check for store. Memory must be aligned or unaligned store
1091 is optimal. Only support store from register, standard SSE
1092 constant or CONST_WIDE_INT generated from piecewise store.
1094 ??? Verify performance impact before enabling CONST_INT for
1095 __int128 store. */
1096 if (misaligned_operand (dst, TImode)
1097 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1098 return false;
1100 switch (GET_CODE (src))
1102 default:
1103 return false;
1105 case REG:
1106 case CONST_WIDE_INT:
1107 return true;
1109 case CONST_INT:
1110 return standard_sse_constant_p (src, TImode);
1113 else if (MEM_P (src))
1115 /* Check for load. Memory must be aligned or unaligned load is
1116 optimal. */
1117 return (REG_P (dst)
1118 && (!misaligned_operand (src, TImode)
1119 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1122 return false;
1125 /* Return 1 if INSN may be converted into vector
1126 instruction. */
1128 static bool
1129 scalar_to_vector_candidate_p (rtx_insn *insn)
1131 if (TARGET_64BIT)
1132 return timode_scalar_to_vector_candidate_p (insn);
1133 else
1134 return dimode_scalar_to_vector_candidate_p (insn);
1137 /* The DImode version of remove_non_convertible_regs. */
1139 static void
1140 dimode_remove_non_convertible_regs (bitmap candidates)
1142 bitmap_iterator bi;
1143 unsigned id;
1144 bitmap regs = BITMAP_ALLOC (NULL);
1146 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1148 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1149 rtx reg = SET_DEST (def_set);
1151 if (!REG_P (reg)
1152 || bitmap_bit_p (regs, REGNO (reg))
1153 || HARD_REGISTER_P (reg))
1154 continue;
1156 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
1157 def;
1158 def = DF_REF_NEXT_REG (def))
1160 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1162 if (dump_file)
1163 fprintf (dump_file,
1164 "r%d has non convertible definition in insn %d\n",
1165 REGNO (reg), DF_REF_INSN_UID (def));
1167 bitmap_set_bit (regs, REGNO (reg));
1168 break;
1173 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1175 for (df_ref def = DF_REG_DEF_CHAIN (id);
1176 def;
1177 def = DF_REF_NEXT_REG (def))
1178 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1180 if (dump_file)
1181 fprintf (dump_file, "Removing insn %d from candidates list\n",
1182 DF_REF_INSN_UID (def));
1184 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1188 BITMAP_FREE (regs);
1191 /* For a register REGNO, scan instructions for its defs and uses.
1192 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1194 static void
1195 timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1196 unsigned int regno)
1198 for (df_ref def = DF_REG_DEF_CHAIN (regno);
1199 def;
1200 def = DF_REF_NEXT_REG (def))
1202 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1204 if (dump_file)
1205 fprintf (dump_file,
1206 "r%d has non convertible def in insn %d\n",
1207 regno, DF_REF_INSN_UID (def));
1209 bitmap_set_bit (regs, regno);
1210 break;
1214 for (df_ref ref = DF_REG_USE_CHAIN (regno);
1215 ref;
1216 ref = DF_REF_NEXT_REG (ref))
1218 /* Debug instructions are skipped. */
1219 if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1220 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1222 if (dump_file)
1223 fprintf (dump_file,
1224 "r%d has non convertible use in insn %d\n",
1225 regno, DF_REF_INSN_UID (ref));
1227 bitmap_set_bit (regs, regno);
1228 break;
1233 /* The TImode version of remove_non_convertible_regs. */
1235 static void
1236 timode_remove_non_convertible_regs (bitmap candidates)
1238 bitmap_iterator bi;
1239 unsigned id;
1240 bitmap regs = BITMAP_ALLOC (NULL);
1242 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1244 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1245 rtx dest = SET_DEST (def_set);
1246 rtx src = SET_SRC (def_set);
1248 if ((!REG_P (dest)
1249 || bitmap_bit_p (regs, REGNO (dest))
1250 || HARD_REGISTER_P (dest))
1251 && (!REG_P (src)
1252 || bitmap_bit_p (regs, REGNO (src))
1253 || HARD_REGISTER_P (src)))
1254 continue;
1256 if (REG_P (dest))
1257 timode_check_non_convertible_regs (candidates, regs,
1258 REGNO (dest));
1260 if (REG_P (src))
1261 timode_check_non_convertible_regs (candidates, regs,
1262 REGNO (src));
1265 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1267 for (df_ref def = DF_REG_DEF_CHAIN (id);
1268 def;
1269 def = DF_REF_NEXT_REG (def))
1270 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1272 if (dump_file)
1273 fprintf (dump_file, "Removing insn %d from candidates list\n",
1274 DF_REF_INSN_UID (def));
1276 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1279 for (df_ref ref = DF_REG_USE_CHAIN (id);
1280 ref;
1281 ref = DF_REF_NEXT_REG (ref))
1282 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1284 if (dump_file)
1285 fprintf (dump_file, "Removing insn %d from candidates list\n",
1286 DF_REF_INSN_UID (ref));
1288 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1292 BITMAP_FREE (regs);
1295 /* For a given bitmap of insn UIDs scans all instruction and
1296 remove insn from CANDIDATES in case it has both convertible
1297 and not convertible definitions.
1299 All insns in a bitmap are conversion candidates according to
1300 scalar_to_vector_candidate_p. Currently it implies all insns
1301 are single_set. */
1303 static void
1304 remove_non_convertible_regs (bitmap candidates)
1306 if (TARGET_64BIT)
1307 timode_remove_non_convertible_regs (candidates);
1308 else
1309 dimode_remove_non_convertible_regs (candidates);
1312 class scalar_chain
1314 public:
1315 scalar_chain ();
1316 virtual ~scalar_chain ();
1318 static unsigned max_id;
1320 /* ID of a chain. */
1321 unsigned int chain_id;
1322 /* A queue of instructions to be included into a chain. */
1323 bitmap queue;
1324 /* Instructions included into a chain. */
1325 bitmap insns;
1326 /* All registers defined by a chain. */
1327 bitmap defs;
1328 /* Registers used in both vector and sclar modes. */
1329 bitmap defs_conv;
1331 void build (bitmap candidates, unsigned insn_uid);
1332 virtual int compute_convert_gain () = 0;
1333 int convert ();
1335 protected:
1336 void add_to_queue (unsigned insn_uid);
1337 void emit_conversion_insns (rtx insns, rtx_insn *pos);
1339 private:
1340 void add_insn (bitmap candidates, unsigned insn_uid);
1341 void analyze_register_chain (bitmap candidates, df_ref ref);
1342 virtual void mark_dual_mode_def (df_ref def) = 0;
1343 virtual void convert_insn (rtx_insn *insn) = 0;
1344 virtual void convert_registers () = 0;
1347 class dimode_scalar_chain : public scalar_chain
1349 public:
1350 int compute_convert_gain ();
1351 private:
1352 void mark_dual_mode_def (df_ref def);
1353 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
1354 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
1355 void convert_insn (rtx_insn *insn);
1356 void convert_op (rtx *op, rtx_insn *insn);
1357 void convert_reg (unsigned regno);
1358 void make_vector_copies (unsigned regno);
1359 void convert_registers ();
1360 int vector_const_cost (rtx exp);
1363 class timode_scalar_chain : public scalar_chain
1365 public:
1366 /* Convert from TImode to V1TImode is always faster. */
1367 int compute_convert_gain () { return 1; }
1369 private:
1370 void mark_dual_mode_def (df_ref def);
1371 void fix_debug_reg_uses (rtx reg);
1372 void convert_insn (rtx_insn *insn);
1373 /* We don't convert registers to difference size. */
1374 void convert_registers () {}
1377 unsigned scalar_chain::max_id = 0;
1379 /* Initialize new chain. */
1381 scalar_chain::scalar_chain ()
1383 chain_id = ++max_id;
1385 if (dump_file)
1386 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
1388 bitmap_obstack_initialize (NULL);
1389 insns = BITMAP_ALLOC (NULL);
1390 defs = BITMAP_ALLOC (NULL);
1391 defs_conv = BITMAP_ALLOC (NULL);
1392 queue = NULL;
1395 /* Free chain's data. */
1397 scalar_chain::~scalar_chain ()
1399 BITMAP_FREE (insns);
1400 BITMAP_FREE (defs);
1401 BITMAP_FREE (defs_conv);
1402 bitmap_obstack_release (NULL);
1405 /* Add instruction into chains' queue. */
1407 void
1408 scalar_chain::add_to_queue (unsigned insn_uid)
1410 if (bitmap_bit_p (insns, insn_uid)
1411 || bitmap_bit_p (queue, insn_uid))
1412 return;
1414 if (dump_file)
1415 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
1416 insn_uid, chain_id);
1417 bitmap_set_bit (queue, insn_uid);
1420 /* For DImode conversion, mark register defined by DEF as requiring
1421 conversion. */
1423 void
1424 dimode_scalar_chain::mark_dual_mode_def (df_ref def)
1426 gcc_assert (DF_REF_REG_DEF_P (def));
1428 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
1429 return;
1431 if (dump_file)
1432 fprintf (dump_file,
1433 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
1434 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
1436 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
1439 /* For TImode conversion, it is unused. */
1441 void
1442 timode_scalar_chain::mark_dual_mode_def (df_ref)
1444 gcc_unreachable ();
1447 /* Check REF's chain to add new insns into a queue
1448 and find registers requiring conversion. */
1450 void
1451 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
1453 df_link *chain;
1455 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
1456 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
1457 add_to_queue (DF_REF_INSN_UID (ref));
1459 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
1461 unsigned uid = DF_REF_INSN_UID (chain->ref);
1463 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
1464 continue;
1466 if (!DF_REF_REG_MEM_P (chain->ref))
1468 if (bitmap_bit_p (insns, uid))
1469 continue;
1471 if (bitmap_bit_p (candidates, uid))
1473 add_to_queue (uid);
1474 continue;
1478 if (DF_REF_REG_DEF_P (chain->ref))
1480 if (dump_file)
1481 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
1482 DF_REF_REGNO (chain->ref), uid);
1483 mark_dual_mode_def (chain->ref);
1485 else
1487 if (dump_file)
1488 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
1489 DF_REF_REGNO (chain->ref), uid);
1490 mark_dual_mode_def (ref);
1495 /* Add instruction into a chain. */
1497 void
1498 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
1500 if (bitmap_bit_p (insns, insn_uid))
1501 return;
1503 if (dump_file)
1504 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
1506 bitmap_set_bit (insns, insn_uid);
1508 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1509 rtx def_set = single_set (insn);
1510 if (def_set && REG_P (SET_DEST (def_set))
1511 && !HARD_REGISTER_P (SET_DEST (def_set)))
1512 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
1514 df_ref ref;
1515 df_ref def;
1516 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1517 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
1518 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
1519 def;
1520 def = DF_REF_NEXT_REG (def))
1521 analyze_register_chain (candidates, def);
1522 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1523 if (!DF_REF_REG_MEM_P (ref))
1524 analyze_register_chain (candidates, ref);
1527 /* Build new chain starting from insn INSN_UID recursively
1528 adding all dependent uses and definitions. */
1530 void
1531 scalar_chain::build (bitmap candidates, unsigned insn_uid)
1533 queue = BITMAP_ALLOC (NULL);
1534 bitmap_set_bit (queue, insn_uid);
1536 if (dump_file)
1537 fprintf (dump_file, "Building chain #%d...\n", chain_id);
1539 while (!bitmap_empty_p (queue))
1541 insn_uid = bitmap_first_set_bit (queue);
1542 bitmap_clear_bit (queue, insn_uid);
1543 bitmap_clear_bit (candidates, insn_uid);
1544 add_insn (candidates, insn_uid);
1547 if (dump_file)
1549 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
1550 fprintf (dump_file, " insns: ");
1551 dump_bitmap (dump_file, insns);
1552 if (!bitmap_empty_p (defs_conv))
1554 bitmap_iterator bi;
1555 unsigned id;
1556 const char *comma = "";
1557 fprintf (dump_file, " defs to convert: ");
1558 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
1560 fprintf (dump_file, "%sr%d", comma, id);
1561 comma = ", ";
1563 fprintf (dump_file, "\n");
1567 BITMAP_FREE (queue);
1570 /* Return a cost of building a vector costant
1571 instead of using a scalar one. */
1574 dimode_scalar_chain::vector_const_cost (rtx exp)
1576 gcc_assert (CONST_INT_P (exp));
1578 if (standard_sse_constant_p (exp, V2DImode))
1579 return COSTS_N_INSNS (1);
1580 return ix86_cost->sse_load[1];
1583 /* Compute a gain for chain conversion. */
1586 dimode_scalar_chain::compute_convert_gain ()
1588 bitmap_iterator bi;
1589 unsigned insn_uid;
1590 int gain = 0;
1591 int cost = 0;
1593 if (dump_file)
1594 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
1596 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
1598 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1599 rtx def_set = single_set (insn);
1600 rtx src = SET_SRC (def_set);
1601 rtx dst = SET_DEST (def_set);
1603 if (REG_P (src) && REG_P (dst))
1604 gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move;
1605 else if (REG_P (src) && MEM_P (dst))
1606 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1607 else if (MEM_P (src) && REG_P (dst))
1608 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
1609 else if (GET_CODE (src) == ASHIFT
1610 || GET_CODE (src) == ASHIFTRT
1611 || GET_CODE (src) == LSHIFTRT)
1613 if (CONST_INT_P (XEXP (src, 0)))
1614 gain -= vector_const_cost (XEXP (src, 0));
1615 if (CONST_INT_P (XEXP (src, 1)))
1617 gain += ix86_cost->shift_const;
1618 if (INTVAL (XEXP (src, 1)) >= 32)
1619 gain -= COSTS_N_INSNS (1);
1621 else
1622 /* Additional gain for omitting two CMOVs. */
1623 gain += ix86_cost->shift_var + COSTS_N_INSNS (2);
1625 else if (GET_CODE (src) == PLUS
1626 || GET_CODE (src) == MINUS
1627 || GET_CODE (src) == IOR
1628 || GET_CODE (src) == XOR
1629 || GET_CODE (src) == AND)
1631 gain += ix86_cost->add;
1632 /* Additional gain for andnot for targets without BMI. */
1633 if (GET_CODE (XEXP (src, 0)) == NOT
1634 && !TARGET_BMI)
1635 gain += 2 * ix86_cost->add;
1637 if (CONST_INT_P (XEXP (src, 0)))
1638 gain -= vector_const_cost (XEXP (src, 0));
1639 if (CONST_INT_P (XEXP (src, 1)))
1640 gain -= vector_const_cost (XEXP (src, 1));
1642 else if (GET_CODE (src) == NEG
1643 || GET_CODE (src) == NOT)
1644 gain += ix86_cost->add - COSTS_N_INSNS (1);
1645 else if (GET_CODE (src) == COMPARE)
1647 /* Assume comparison cost is the same. */
1649 else if (CONST_INT_P (src))
1651 if (REG_P (dst))
1652 gain += COSTS_N_INSNS (2);
1653 else if (MEM_P (dst))
1654 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1655 gain -= vector_const_cost (src);
1657 else
1658 gcc_unreachable ();
1661 if (dump_file)
1662 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
1664 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
1665 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
1667 if (dump_file)
1668 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
1670 gain -= cost;
1672 if (dump_file)
1673 fprintf (dump_file, " Total gain: %d\n", gain);
1675 return gain;
1678 /* Replace REG in X with a V2DI subreg of NEW_REG. */
1681 dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
1683 if (x == reg)
1684 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
1686 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
1687 int i, j;
1688 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
1690 if (fmt[i] == 'e')
1691 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
1692 else if (fmt[i] == 'E')
1693 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1694 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
1695 reg, new_reg);
1698 return x;
1701 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
1703 void
1704 dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn,
1705 rtx reg, rtx new_reg)
1707 replace_with_subreg (single_set (insn), reg, new_reg);
1710 /* Insert generated conversion instruction sequence INSNS
1711 after instruction AFTER. New BB may be required in case
1712 instruction has EH region attached. */
1714 void
1715 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
1717 if (!control_flow_insn_p (after))
1719 emit_insn_after (insns, after);
1720 return;
1723 basic_block bb = BLOCK_FOR_INSN (after);
1724 edge e = find_fallthru_edge (bb->succs);
1725 gcc_assert (e);
1727 basic_block new_bb = split_edge (e);
1728 emit_insn_after (insns, BB_HEAD (new_bb));
1731 /* Make vector copies for all register REGNO definitions
1732 and replace its uses in a chain. */
1734 void
1735 dimode_scalar_chain::make_vector_copies (unsigned regno)
1737 rtx reg = regno_reg_rtx[regno];
1738 rtx vreg = gen_reg_rtx (DImode);
1739 bool count_reg = false;
1740 df_ref ref;
1742 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1743 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1745 df_ref use;
1747 /* Detect the count register of a shift instruction. */
1748 for (use = DF_REG_USE_CHAIN (regno); use; use = DF_REF_NEXT_REG (use))
1749 if (bitmap_bit_p (insns, DF_REF_INSN_UID (use)))
1751 rtx_insn *insn = DF_REF_INSN (use);
1752 rtx def_set = single_set (insn);
1754 gcc_assert (def_set);
1756 rtx src = SET_SRC (def_set);
1758 if ((GET_CODE (src) == ASHIFT
1759 || GET_CODE (src) == ASHIFTRT
1760 || GET_CODE (src) == LSHIFTRT)
1761 && !CONST_INT_P (XEXP (src, 1))
1762 && reg_or_subregno (XEXP (src, 1)) == regno)
1763 count_reg = true;
1766 start_sequence ();
1767 if (count_reg)
1769 rtx qreg = gen_lowpart (QImode, reg);
1770 rtx tmp = gen_reg_rtx (SImode);
1772 if (TARGET_ZERO_EXTEND_WITH_AND
1773 && optimize_function_for_speed_p (cfun))
1775 emit_move_insn (tmp, const0_rtx);
1776 emit_insn (gen_movstrictqi
1777 (gen_lowpart (QImode, tmp), qreg));
1779 else
1780 emit_insn (gen_rtx_SET
1781 (tmp, gen_rtx_ZERO_EXTEND (SImode, qreg)));
1783 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1785 rtx slot = assign_386_stack_local (SImode, SLOT_STV_TEMP);
1786 emit_move_insn (slot, tmp);
1787 tmp = copy_rtx (slot);
1790 emit_insn (gen_zero_extendsidi2 (vreg, tmp));
1792 else if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1794 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1795 emit_move_insn (adjust_address (tmp, SImode, 0),
1796 gen_rtx_SUBREG (SImode, reg, 0));
1797 emit_move_insn (adjust_address (tmp, SImode, 4),
1798 gen_rtx_SUBREG (SImode, reg, 4));
1799 emit_move_insn (vreg, tmp);
1801 else if (TARGET_SSE4_1)
1803 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1804 CONST0_RTX (V4SImode),
1805 gen_rtx_SUBREG (SImode, reg, 0)));
1806 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
1807 gen_rtx_SUBREG (V4SImode, vreg, 0),
1808 gen_rtx_SUBREG (SImode, reg, 4),
1809 GEN_INT (2)));
1811 else
1813 rtx tmp = gen_reg_rtx (DImode);
1814 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1815 CONST0_RTX (V4SImode),
1816 gen_rtx_SUBREG (SImode, reg, 0)));
1817 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
1818 CONST0_RTX (V4SImode),
1819 gen_rtx_SUBREG (SImode, reg, 4)));
1820 emit_insn (gen_vec_interleave_lowv4si
1821 (gen_rtx_SUBREG (V4SImode, vreg, 0),
1822 gen_rtx_SUBREG (V4SImode, vreg, 0),
1823 gen_rtx_SUBREG (V4SImode, tmp, 0)));
1825 rtx_insn *seq = get_insns ();
1826 end_sequence ();
1827 rtx_insn *insn = DF_REF_INSN (ref);
1828 emit_conversion_insns (seq, insn);
1830 if (dump_file)
1831 fprintf (dump_file,
1832 " Copied r%d to a vector register r%d for insn %d\n",
1833 regno, REGNO (vreg), INSN_UID (insn));
1836 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1837 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1839 rtx_insn *insn = DF_REF_INSN (ref);
1840 if (count_reg)
1842 rtx def_set = single_set (insn);
1843 gcc_assert (def_set);
1845 rtx src = SET_SRC (def_set);
1847 if ((GET_CODE (src) == ASHIFT
1848 || GET_CODE (src) == ASHIFTRT
1849 || GET_CODE (src) == LSHIFTRT)
1850 && !CONST_INT_P (XEXP (src, 1))
1851 && reg_or_subregno (XEXP (src, 1)) == regno)
1852 XEXP (src, 1) = vreg;
1854 else
1855 replace_with_subreg_in_insn (insn, reg, vreg);
1857 if (dump_file)
1858 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
1859 regno, REGNO (vreg), INSN_UID (insn));
1863 /* Convert all definitions of register REGNO
1864 and fix its uses. Scalar copies may be created
1865 in case register is used in not convertible insn. */
1867 void
1868 dimode_scalar_chain::convert_reg (unsigned regno)
1870 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
1871 rtx reg = regno_reg_rtx[regno];
1872 rtx scopy = NULL_RTX;
1873 df_ref ref;
1874 bitmap conv;
1876 conv = BITMAP_ALLOC (NULL);
1877 bitmap_copy (conv, insns);
1879 if (scalar_copy)
1880 scopy = gen_reg_rtx (DImode);
1882 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1884 rtx_insn *insn = DF_REF_INSN (ref);
1885 rtx def_set = single_set (insn);
1886 rtx src = SET_SRC (def_set);
1887 rtx reg = DF_REF_REG (ref);
1889 if (!MEM_P (src))
1891 replace_with_subreg_in_insn (insn, reg, reg);
1892 bitmap_clear_bit (conv, INSN_UID (insn));
1895 if (scalar_copy)
1897 start_sequence ();
1898 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
1900 rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1901 emit_move_insn (tmp, reg);
1902 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1903 adjust_address (tmp, SImode, 0));
1904 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1905 adjust_address (tmp, SImode, 4));
1907 else if (TARGET_SSE4_1)
1909 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
1910 emit_insn
1911 (gen_rtx_SET
1912 (gen_rtx_SUBREG (SImode, scopy, 0),
1913 gen_rtx_VEC_SELECT (SImode,
1914 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1916 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
1917 emit_insn
1918 (gen_rtx_SET
1919 (gen_rtx_SUBREG (SImode, scopy, 4),
1920 gen_rtx_VEC_SELECT (SImode,
1921 gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1923 else
1925 rtx vcopy = gen_reg_rtx (V2DImode);
1926 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
1927 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1928 gen_rtx_SUBREG (SImode, vcopy, 0));
1929 emit_move_insn (vcopy,
1930 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
1931 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1932 gen_rtx_SUBREG (SImode, vcopy, 0));
1934 rtx_insn *seq = get_insns ();
1935 end_sequence ();
1936 emit_conversion_insns (seq, insn);
1938 if (dump_file)
1939 fprintf (dump_file,
1940 " Copied r%d to a scalar register r%d for insn %d\n",
1941 regno, REGNO (scopy), INSN_UID (insn));
1945 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1946 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1948 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
1950 rtx_insn *insn = DF_REF_INSN (ref);
1952 rtx def_set = single_set (insn);
1953 gcc_assert (def_set);
1955 rtx src = SET_SRC (def_set);
1956 rtx dst = SET_DEST (def_set);
1958 if ((GET_CODE (src) == ASHIFT
1959 || GET_CODE (src) == ASHIFTRT
1960 || GET_CODE (src) == LSHIFTRT)
1961 && !CONST_INT_P (XEXP (src, 1))
1962 && reg_or_subregno (XEXP (src, 1)) == regno)
1964 rtx tmp2 = gen_reg_rtx (V2DImode);
1966 start_sequence ();
1968 if (TARGET_SSE4_1)
1969 emit_insn (gen_sse4_1_zero_extendv2qiv2di2
1970 (tmp2, gen_rtx_SUBREG (V16QImode, reg, 0)));
1971 else
1973 rtx vec_cst
1974 = gen_rtx_CONST_VECTOR (V2DImode,
1975 gen_rtvec (2, GEN_INT (0xff),
1976 const0_rtx));
1977 vec_cst
1978 = validize_mem (force_const_mem (V2DImode, vec_cst));
1980 emit_insn (gen_rtx_SET
1981 (tmp2,
1982 gen_rtx_AND (V2DImode,
1983 gen_rtx_SUBREG (V2DImode, reg, 0),
1984 vec_cst)));
1986 rtx_insn *seq = get_insns ();
1987 end_sequence ();
1989 emit_insn_before (seq, insn);
1991 XEXP (src, 1) = gen_rtx_SUBREG (DImode, tmp2, 0);
1993 else if (!MEM_P (dst) || !REG_P (src))
1994 replace_with_subreg_in_insn (insn, reg, reg);
1996 bitmap_clear_bit (conv, INSN_UID (insn));
1999 /* Skip debug insns and uninitialized uses. */
2000 else if (DF_REF_CHAIN (ref)
2001 && NONDEBUG_INSN_P (DF_REF_INSN (ref)))
2003 gcc_assert (scopy);
2004 replace_rtx (DF_REF_INSN (ref), reg, scopy);
2005 df_insn_rescan (DF_REF_INSN (ref));
2008 BITMAP_FREE (conv);
2011 /* Convert operand OP in INSN. We should handle
2012 memory operands and uninitialized registers.
2013 All other register uses are converted during
2014 registers conversion. */
2016 void
2017 dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
2019 *op = copy_rtx_if_shared (*op);
2021 if (GET_CODE (*op) == NOT)
2023 convert_op (&XEXP (*op, 0), insn);
2024 PUT_MODE (*op, V2DImode);
2026 else if (MEM_P (*op))
2028 rtx tmp = gen_reg_rtx (DImode);
2030 emit_insn_before (gen_move_insn (tmp, *op), insn);
2031 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
2033 if (dump_file)
2034 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
2035 INSN_UID (insn), REGNO (tmp));
2037 else if (REG_P (*op))
2039 /* We may have not converted register usage in case
2040 this register has no definition. Otherwise it
2041 should be converted in convert_reg. */
2042 df_ref ref;
2043 FOR_EACH_INSN_USE (ref, insn)
2044 if (DF_REF_REGNO (ref) == REGNO (*op))
2046 gcc_assert (!DF_REF_CHAIN (ref));
2047 break;
2049 *op = gen_rtx_SUBREG (V2DImode, *op, 0);
2051 else if (CONST_INT_P (*op))
2053 rtx vec_cst;
2054 rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0);
2056 /* Prefer all ones vector in case of -1. */
2057 if (constm1_operand (*op, GET_MODE (*op)))
2058 vec_cst = CONSTM1_RTX (V2DImode);
2059 else
2060 vec_cst = gen_rtx_CONST_VECTOR (V2DImode,
2061 gen_rtvec (2, *op, const0_rtx));
2063 if (!standard_sse_constant_p (vec_cst, V2DImode))
2065 start_sequence ();
2066 vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst));
2067 rtx_insn *seq = get_insns ();
2068 end_sequence ();
2069 emit_insn_before (seq, insn);
2072 emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
2073 *op = tmp;
2075 else
2077 gcc_assert (SUBREG_P (*op));
2078 gcc_assert (GET_MODE (*op) == V2DImode);
2082 /* Convert INSN to vector mode. */
2084 void
2085 dimode_scalar_chain::convert_insn (rtx_insn *insn)
2087 rtx def_set = single_set (insn);
2088 rtx src = SET_SRC (def_set);
2089 rtx dst = SET_DEST (def_set);
2090 rtx subreg;
2092 if (MEM_P (dst) && !REG_P (src))
2094 /* There are no scalar integer instructions and therefore
2095 temporary register usage is required. */
2096 rtx tmp = gen_reg_rtx (DImode);
2097 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
2098 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
2101 switch (GET_CODE (src))
2103 case ASHIFT:
2104 case ASHIFTRT:
2105 case LSHIFTRT:
2106 convert_op (&XEXP (src, 0), insn);
2107 PUT_MODE (src, V2DImode);
2108 break;
2110 case PLUS:
2111 case MINUS:
2112 case IOR:
2113 case XOR:
2114 case AND:
2115 convert_op (&XEXP (src, 0), insn);
2116 convert_op (&XEXP (src, 1), insn);
2117 PUT_MODE (src, V2DImode);
2118 break;
2120 case NEG:
2121 src = XEXP (src, 0);
2122 convert_op (&src, insn);
2123 subreg = gen_reg_rtx (V2DImode);
2124 emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn);
2125 src = gen_rtx_MINUS (V2DImode, subreg, src);
2126 break;
2128 case NOT:
2129 src = XEXP (src, 0);
2130 convert_op (&src, insn);
2131 subreg = gen_reg_rtx (V2DImode);
2132 emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn);
2133 src = gen_rtx_XOR (V2DImode, src, subreg);
2134 break;
2136 case MEM:
2137 if (!REG_P (dst))
2138 convert_op (&src, insn);
2139 break;
2141 case REG:
2142 if (!MEM_P (dst))
2143 convert_op (&src, insn);
2144 break;
2146 case SUBREG:
2147 gcc_assert (GET_MODE (src) == V2DImode);
2148 break;
2150 case COMPARE:
2151 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
2153 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
2154 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
2156 if (REG_P (src))
2157 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
2158 else
2159 subreg = copy_rtx_if_shared (src);
2160 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
2161 copy_rtx_if_shared (subreg),
2162 copy_rtx_if_shared (subreg)),
2163 insn);
2164 dst = gen_rtx_REG (CCmode, FLAGS_REG);
2165 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
2166 copy_rtx_if_shared (src)),
2167 UNSPEC_PTEST);
2168 break;
2170 case CONST_INT:
2171 convert_op (&src, insn);
2172 break;
2174 default:
2175 gcc_unreachable ();
2178 SET_SRC (def_set) = src;
2179 SET_DEST (def_set) = dst;
2181 /* Drop possible dead definitions. */
2182 PATTERN (insn) = def_set;
2184 INSN_CODE (insn) = -1;
2185 recog_memoized (insn);
2186 df_insn_rescan (insn);
2189 /* Fix uses of converted REG in debug insns. */
2191 void
2192 timode_scalar_chain::fix_debug_reg_uses (rtx reg)
2194 if (!flag_var_tracking)
2195 return;
2197 df_ref ref, next;
2198 for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
2200 rtx_insn *insn = DF_REF_INSN (ref);
2201 /* Make sure the next ref is for a different instruction,
2202 so that we're not affected by the rescan. */
2203 next = DF_REF_NEXT_REG (ref);
2204 while (next && DF_REF_INSN (next) == insn)
2205 next = DF_REF_NEXT_REG (next);
2207 if (DEBUG_INSN_P (insn))
2209 /* It may be a debug insn with a TImode variable in
2210 register. */
2211 bool changed = false;
2212 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
2214 rtx *loc = DF_REF_LOC (ref);
2215 if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
2217 *loc = gen_rtx_SUBREG (TImode, *loc, 0);
2218 changed = true;
2221 if (changed)
2222 df_insn_rescan (insn);
2227 /* Convert INSN from TImode to V1T1mode. */
2229 void
2230 timode_scalar_chain::convert_insn (rtx_insn *insn)
2232 rtx def_set = single_set (insn);
2233 rtx src = SET_SRC (def_set);
2234 rtx dst = SET_DEST (def_set);
2236 switch (GET_CODE (dst))
2238 case REG:
2240 rtx tmp = find_reg_equal_equiv_note (insn);
2241 if (tmp)
2242 PUT_MODE (XEXP (tmp, 0), V1TImode);
2243 PUT_MODE (dst, V1TImode);
2244 fix_debug_reg_uses (dst);
2246 break;
2247 case MEM:
2248 PUT_MODE (dst, V1TImode);
2249 break;
2251 default:
2252 gcc_unreachable ();
2255 switch (GET_CODE (src))
2257 case REG:
2258 PUT_MODE (src, V1TImode);
2259 /* Call fix_debug_reg_uses only if SRC is never defined. */
2260 if (!DF_REG_DEF_CHAIN (REGNO (src)))
2261 fix_debug_reg_uses (src);
2262 break;
2264 case MEM:
2265 PUT_MODE (src, V1TImode);
2266 break;
2268 case CONST_WIDE_INT:
2269 if (NONDEBUG_INSN_P (insn))
2271 /* Since there are no instructions to store 128-bit constant,
2272 temporary register usage is required. */
2273 rtx tmp = gen_reg_rtx (V1TImode);
2274 start_sequence ();
2275 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
2276 src = validize_mem (force_const_mem (V1TImode, src));
2277 rtx_insn *seq = get_insns ();
2278 end_sequence ();
2279 if (seq)
2280 emit_insn_before (seq, insn);
2281 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2282 dst = tmp;
2284 break;
2286 case CONST_INT:
2287 switch (standard_sse_constant_p (src, TImode))
2289 case 1:
2290 src = CONST0_RTX (GET_MODE (dst));
2291 break;
2292 case 2:
2293 src = CONSTM1_RTX (GET_MODE (dst));
2294 break;
2295 default:
2296 gcc_unreachable ();
2298 if (NONDEBUG_INSN_P (insn))
2300 rtx tmp = gen_reg_rtx (V1TImode);
2301 /* Since there are no instructions to store standard SSE
2302 constant, temporary register usage is required. */
2303 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2304 dst = tmp;
2306 break;
2308 default:
2309 gcc_unreachable ();
2312 SET_SRC (def_set) = src;
2313 SET_DEST (def_set) = dst;
2315 /* Drop possible dead definitions. */
2316 PATTERN (insn) = def_set;
2318 INSN_CODE (insn) = -1;
2319 recog_memoized (insn);
2320 df_insn_rescan (insn);
2323 void
2324 dimode_scalar_chain::convert_registers ()
2326 bitmap_iterator bi;
2327 unsigned id;
2329 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
2330 convert_reg (id);
2332 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
2333 make_vector_copies (id);
2336 /* Convert whole chain creating required register
2337 conversions and copies. */
2340 scalar_chain::convert ()
2342 bitmap_iterator bi;
2343 unsigned id;
2344 int converted_insns = 0;
2346 if (!dbg_cnt (stv_conversion))
2347 return 0;
2349 if (dump_file)
2350 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
2352 convert_registers ();
2354 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
2356 convert_insn (DF_INSN_UID_GET (id)->insn);
2357 converted_insns++;
2360 return converted_insns;
2363 /* Main STV pass function. Find and convert scalar
2364 instructions into vector mode when profitable. */
2366 static unsigned int
2367 convert_scalars_to_vector ()
2369 basic_block bb;
2370 bitmap candidates;
2371 int converted_insns = 0;
2373 bitmap_obstack_initialize (NULL);
2374 candidates = BITMAP_ALLOC (NULL);
2376 calculate_dominance_info (CDI_DOMINATORS);
2377 df_set_flags (DF_DEFER_INSN_RESCAN);
2378 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2379 df_md_add_problem ();
2380 df_analyze ();
2382 /* Find all instructions we want to convert into vector mode. */
2383 if (dump_file)
2384 fprintf (dump_file, "Searching for mode conversion candidates...\n");
2386 FOR_EACH_BB_FN (bb, cfun)
2388 rtx_insn *insn;
2389 FOR_BB_INSNS (bb, insn)
2390 if (scalar_to_vector_candidate_p (insn))
2392 if (dump_file)
2393 fprintf (dump_file, " insn %d is marked as a candidate\n",
2394 INSN_UID (insn));
2396 bitmap_set_bit (candidates, INSN_UID (insn));
2400 remove_non_convertible_regs (candidates);
2402 if (bitmap_empty_p (candidates))
2403 if (dump_file)
2404 fprintf (dump_file, "There are no candidates for optimization.\n");
2406 while (!bitmap_empty_p (candidates))
2408 unsigned uid = bitmap_first_set_bit (candidates);
2409 scalar_chain *chain;
2411 if (TARGET_64BIT)
2412 chain = new timode_scalar_chain;
2413 else
2414 chain = new dimode_scalar_chain;
2416 /* Find instructions chain we want to convert to vector mode.
2417 Check all uses and definitions to estimate all required
2418 conversions. */
2419 chain->build (candidates, uid);
2421 if (chain->compute_convert_gain () > 0)
2422 converted_insns += chain->convert ();
2423 else
2424 if (dump_file)
2425 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
2426 chain->chain_id);
2428 delete chain;
2431 if (dump_file)
2432 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
2434 BITMAP_FREE (candidates);
2435 bitmap_obstack_release (NULL);
2436 df_process_deferred_rescans ();
2438 /* Conversion means we may have 128bit register spills/fills
2439 which require aligned stack. */
2440 if (converted_insns)
2442 if (crtl->stack_alignment_needed < 128)
2443 crtl->stack_alignment_needed = 128;
2444 if (crtl->stack_alignment_estimated < 128)
2445 crtl->stack_alignment_estimated = 128;
2446 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
2447 if (TARGET_64BIT)
2448 for (tree parm = DECL_ARGUMENTS (current_function_decl);
2449 parm; parm = DECL_CHAIN (parm))
2451 if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
2452 continue;
2453 if (DECL_RTL_SET_P (parm)
2454 && GET_MODE (DECL_RTL (parm)) == V1TImode)
2456 rtx r = DECL_RTL (parm);
2457 if (REG_P (r))
2458 SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
2460 if (DECL_INCOMING_RTL (parm)
2461 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
2463 rtx r = DECL_INCOMING_RTL (parm);
2464 if (REG_P (r))
2465 DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
2470 return 0;
2473 namespace {
2475 const pass_data pass_data_insert_vzeroupper =
2477 RTL_PASS, /* type */
2478 "vzeroupper", /* name */
2479 OPTGROUP_NONE, /* optinfo_flags */
2480 TV_MACH_DEP, /* tv_id */
2481 0, /* properties_required */
2482 0, /* properties_provided */
2483 0, /* properties_destroyed */
2484 0, /* todo_flags_start */
2485 TODO_df_finish, /* todo_flags_finish */
2488 class pass_insert_vzeroupper : public rtl_opt_pass
2490 public:
2491 pass_insert_vzeroupper(gcc::context *ctxt)
2492 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2495 /* opt_pass methods: */
2496 virtual bool gate (function *)
2498 return TARGET_AVX && !TARGET_AVX512F
2499 && TARGET_VZEROUPPER && flag_expensive_optimizations
2500 && !optimize_size;
2503 virtual unsigned int execute (function *)
2505 return rest_of_handle_insert_vzeroupper ();
2508 }; // class pass_insert_vzeroupper
2510 const pass_data pass_data_stv =
2512 RTL_PASS, /* type */
2513 "stv", /* name */
2514 OPTGROUP_NONE, /* optinfo_flags */
2515 TV_MACH_DEP, /* tv_id */
2516 0, /* properties_required */
2517 0, /* properties_provided */
2518 0, /* properties_destroyed */
2519 0, /* todo_flags_start */
2520 TODO_df_finish, /* todo_flags_finish */
2523 class pass_stv : public rtl_opt_pass
2525 public:
2526 pass_stv (gcc::context *ctxt)
2527 : rtl_opt_pass (pass_data_stv, ctxt),
2528 timode_p (false)
2531 /* opt_pass methods: */
2532 virtual bool gate (function *)
2534 return (timode_p == !!TARGET_64BIT
2535 && TARGET_STV && TARGET_SSE2 && optimize > 1);
2538 virtual unsigned int execute (function *)
2540 return convert_scalars_to_vector ();
2543 opt_pass *clone ()
2545 return new pass_stv (m_ctxt);
2548 void set_pass_param (unsigned int n, bool param)
2550 gcc_assert (n == 0);
2551 timode_p = param;
2554 private:
2555 bool timode_p;
2556 }; // class pass_stv
2558 } // anon namespace
2560 rtl_opt_pass *
2561 make_pass_insert_vzeroupper (gcc::context *ctxt)
2563 return new pass_insert_vzeroupper (ctxt);
2566 rtl_opt_pass *
2567 make_pass_stv (gcc::context *ctxt)
2569 return new pass_stv (ctxt);
2572 /* Inserting ENDBRANCH instructions. */
2574 static unsigned int
2575 rest_of_insert_endbranch (void)
2577 timevar_push (TV_MACH_DEP);
2579 rtx cet_eb;
2580 rtx_insn *insn;
2581 basic_block bb;
2583 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is
2584 absent among function attributes. Later an optimization will be
2585 introduced to make analysis if an address of a static function is
2586 taken. A static function whose address is not taken will get a
2587 nocf_check attribute. This will allow to reduce the number of EB. */
2589 if (!lookup_attribute ("nocf_check",
2590 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
2591 && !cgraph_node::get (cfun->decl)->only_called_directly_p ())
2593 cet_eb = gen_nop_endbr ();
2595 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
2596 insn = BB_HEAD (bb);
2597 emit_insn_before (cet_eb, insn);
2600 bb = 0;
2601 FOR_EACH_BB_FN (bb, cfun)
2603 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
2604 insn = NEXT_INSN (insn))
2606 if (INSN_P (insn) && GET_CODE (insn) == CALL_INSN)
2608 rtx_insn *next_insn = insn;
2610 while ((next_insn != BB_END (bb))
2611 && (DEBUG_INSN_P (NEXT_INSN (next_insn))
2612 || NOTE_P (NEXT_INSN (next_insn))
2613 || BARRIER_P (NEXT_INSN (next_insn))))
2614 next_insn = NEXT_INSN (next_insn);
2616 /* Generate ENDBRANCH after CALL, which can return more than
2617 twice, setjmp-like functions. */
2618 if (find_reg_note (insn, REG_SETJMP, NULL) != NULL)
2620 cet_eb = gen_nop_endbr ();
2621 emit_insn_after (cet_eb, next_insn);
2623 continue;
2626 if (INSN_P (insn) && JUMP_P (insn) && flag_cet_switch)
2628 rtx target = JUMP_LABEL (insn);
2629 if (target == NULL_RTX || ANY_RETURN_P (target))
2630 continue;
2632 /* Check the jump is a switch table. */
2633 rtx_insn *label = as_a<rtx_insn *> (target);
2634 rtx_insn *table = next_insn (label);
2635 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
2636 continue;
2638 /* For the indirect jump find out all places it jumps and insert
2639 ENDBRANCH there. It should be done under a special flag to
2640 control ENDBRANCH generation for switch stmts. */
2641 edge_iterator ei;
2642 edge e;
2643 basic_block dest_blk;
2645 FOR_EACH_EDGE (e, ei, bb->succs)
2647 rtx_insn *insn;
2649 dest_blk = e->dest;
2650 insn = BB_HEAD (dest_blk);
2651 gcc_assert (LABEL_P (insn));
2652 cet_eb = gen_nop_endbr ();
2653 emit_insn_after (cet_eb, insn);
2655 continue;
2658 if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn))
2659 || (NOTE_P (insn)
2660 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
2661 /* TODO. Check /s bit also. */
2663 cet_eb = gen_nop_endbr ();
2664 emit_insn_after (cet_eb, insn);
2665 continue;
2670 timevar_pop (TV_MACH_DEP);
2671 return 0;
2674 namespace {
2676 const pass_data pass_data_insert_endbranch =
2678 RTL_PASS, /* type. */
2679 "cet", /* name. */
2680 OPTGROUP_NONE, /* optinfo_flags. */
2681 TV_MACH_DEP, /* tv_id. */
2682 0, /* properties_required. */
2683 0, /* properties_provided. */
2684 0, /* properties_destroyed. */
2685 0, /* todo_flags_start. */
2686 0, /* todo_flags_finish. */
2689 class pass_insert_endbranch : public rtl_opt_pass
2691 public:
2692 pass_insert_endbranch (gcc::context *ctxt)
2693 : rtl_opt_pass (pass_data_insert_endbranch, ctxt)
2696 /* opt_pass methods: */
2697 virtual bool gate (function *)
2699 return ((flag_cf_protection & CF_BRANCH) && TARGET_IBT);
2702 virtual unsigned int execute (function *)
2704 return rest_of_insert_endbranch ();
2707 }; // class pass_insert_endbranch
2709 } // anon namespace
2711 rtl_opt_pass *
2712 make_pass_insert_endbranch (gcc::context *ctxt)
2714 return new pass_insert_endbranch (ctxt);
2717 /* Return true if a red-zone is in use. */
2719 bool
2720 ix86_using_red_zone (void)
2722 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2725 /* Return a string that documents the current -m options. The caller is
2726 responsible for freeing the string. */
2728 static char *
2729 ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
2730 int flags, int flags2,
2731 const char *arch, const char *tune,
2732 enum fpmath_unit fpmath, bool add_nl_p)
2734 struct ix86_target_opts
2736 const char *option; /* option string */
2737 HOST_WIDE_INT mask; /* isa mask options */
2740 /* This table is ordered so that options like -msse4.2 that imply other
2741 ISAs come first. Target string will be displayed in the same order. */
2742 static struct ix86_target_opts isa2_opts[] =
2744 { "-mgfni", OPTION_MASK_ISA_GFNI },
2745 { "-mrdpid", OPTION_MASK_ISA_RDPID },
2746 { "-msgx", OPTION_MASK_ISA_SGX },
2747 { "-mavx5124vnniw", OPTION_MASK_ISA_AVX5124VNNIW },
2748 { "-mavx5124fmaps", OPTION_MASK_ISA_AVX5124FMAPS },
2749 { "-mavx512vpopcntdq", OPTION_MASK_ISA_AVX512VPOPCNTDQ },
2750 { "-mibt", OPTION_MASK_ISA_IBT },
2751 { "-mshstk", OPTION_MASK_ISA_SHSTK }
2753 static struct ix86_target_opts isa_opts[] =
2755 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2756 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2757 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2758 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2759 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2760 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2761 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2762 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2763 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2764 { "-mavx2", OPTION_MASK_ISA_AVX2 },
2765 { "-mfma", OPTION_MASK_ISA_FMA },
2766 { "-mxop", OPTION_MASK_ISA_XOP },
2767 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2768 { "-mf16c", OPTION_MASK_ISA_F16C },
2769 { "-mavx", OPTION_MASK_ISA_AVX },
2770 /* { "-msse4" OPTION_MASK_ISA_SSE4 }, */
2771 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2772 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2773 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2774 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2775 { "-msse3", OPTION_MASK_ISA_SSE3 },
2776 { "-maes", OPTION_MASK_ISA_AES },
2777 { "-msha", OPTION_MASK_ISA_SHA },
2778 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2779 { "-msse2", OPTION_MASK_ISA_SSE2 },
2780 { "-msse", OPTION_MASK_ISA_SSE },
2781 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2782 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2783 { "-mmmx", OPTION_MASK_ISA_MMX },
2784 { "-mrtm", OPTION_MASK_ISA_RTM },
2785 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2786 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2787 { "-madx", OPTION_MASK_ISA_ADX },
2788 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2789 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2790 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2791 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2792 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2793 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2794 { "-mabm", OPTION_MASK_ISA_ABM },
2795 { "-mbmi", OPTION_MASK_ISA_BMI },
2796 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2797 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2798 { "-mtbm", OPTION_MASK_ISA_TBM },
2799 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2800 { "-mcx16", OPTION_MASK_ISA_CX16 },
2801 { "-msahf", OPTION_MASK_ISA_SAHF },
2802 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2803 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2804 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2805 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2806 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
2807 { "-mclzero", OPTION_MASK_ISA_CLZERO },
2808 { "-mpku", OPTION_MASK_ISA_PKU },
2809 { "-mlwp", OPTION_MASK_ISA_LWP },
2810 { "-mhle", OPTION_MASK_ISA_HLE },
2811 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2812 { "-mmpx", OPTION_MASK_ISA_MPX },
2813 { "-mclwb", OPTION_MASK_ISA_CLWB }
2816 /* Flag options. */
2817 static struct ix86_target_opts flag_opts[] =
2819 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2820 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2821 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2822 { "-m80387", MASK_80387 },
2823 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2824 { "-malign-double", MASK_ALIGN_DOUBLE },
2825 { "-mcld", MASK_CLD },
2826 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2827 { "-mieee-fp", MASK_IEEE_FP },
2828 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2829 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2830 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2831 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2832 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2833 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2834 { "-mno-red-zone", MASK_NO_RED_ZONE },
2835 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2836 { "-mrecip", MASK_RECIP },
2837 { "-mrtd", MASK_RTD },
2838 { "-msseregparm", MASK_SSEREGPARM },
2839 { "-mstack-arg-probe", MASK_STACK_PROBE },
2840 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2841 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2842 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2843 { "-mvzeroupper", MASK_VZEROUPPER },
2844 { "-mstv", MASK_STV },
2845 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD },
2846 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE },
2847 { "-mprefer-avx128", MASK_PREFER_AVX128 },
2848 { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES }
2851 /* Additional flag options. */
2852 static struct ix86_target_opts flag2_opts[] =
2854 { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY },
2855 { "-mprefer-avx256", OPTION_MASK_PREFER_AVX256 },
2858 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (isa2_opts)
2859 + ARRAY_SIZE (flag_opts) + ARRAY_SIZE (flag2_opts) + 6][2];
2861 char isa_other[40];
2862 char isa2_other[40];
2863 char flags_other[40];
2864 char flags2_other[40];
2865 unsigned num = 0;
2866 unsigned i, j;
2867 char *ret;
2868 char *ptr;
2869 size_t len;
2870 size_t line_len;
2871 size_t sep_len;
2872 const char *abi;
2874 memset (opts, '\0', sizeof (opts));
2876 /* Add -march= option. */
2877 if (arch)
2879 opts[num][0] = "-march=";
2880 opts[num++][1] = arch;
2883 /* Add -mtune= option. */
2884 if (tune)
2886 opts[num][0] = "-mtune=";
2887 opts[num++][1] = tune;
2890 /* Add -m32/-m64/-mx32. */
2891 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2893 if ((isa & OPTION_MASK_ABI_64) != 0)
2894 abi = "-m64";
2895 else
2896 abi = "-mx32";
2897 isa &= ~ (OPTION_MASK_ISA_64BIT
2898 | OPTION_MASK_ABI_64
2899 | OPTION_MASK_ABI_X32);
2901 else
2902 abi = "-m32";
2903 opts[num++][0] = abi;
2905 /* Pick out the options in isa2 options. */
2906 for (i = 0; i < ARRAY_SIZE (isa2_opts); i++)
2908 if ((isa2 & isa2_opts[i].mask) != 0)
2910 opts[num++][0] = isa2_opts[i].option;
2911 isa2 &= ~ isa2_opts[i].mask;
2915 if (isa2 && add_nl_p)
2917 opts[num++][0] = isa2_other;
2918 sprintf (isa2_other, "(other isa2: %#" HOST_WIDE_INT_PRINT "x)", isa2);
2921 /* Pick out the options in isa options. */
2922 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2924 if ((isa & isa_opts[i].mask) != 0)
2926 opts[num++][0] = isa_opts[i].option;
2927 isa &= ~ isa_opts[i].mask;
2931 if (isa && add_nl_p)
2933 opts[num++][0] = isa_other;
2934 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)", isa);
2937 /* Add flag options. */
2938 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2940 if ((flags & flag_opts[i].mask) != 0)
2942 opts[num++][0] = flag_opts[i].option;
2943 flags &= ~ flag_opts[i].mask;
2947 if (flags && add_nl_p)
2949 opts[num++][0] = flags_other;
2950 sprintf (flags_other, "(other flags: %#x)", flags);
2953 /* Add additional flag options. */
2954 for (i = 0; i < ARRAY_SIZE (flag2_opts); i++)
2956 if ((flags2 & flag2_opts[i].mask) != 0)
2958 opts[num++][0] = flag2_opts[i].option;
2959 flags2 &= ~ flag2_opts[i].mask;
2963 if (flags2 && add_nl_p)
2965 opts[num++][0] = flags2_other;
2966 sprintf (flags2_other, "(other flags2: %#x)", flags2);
2969 /* Add -fpmath= option. */
2970 if (fpmath)
2972 opts[num][0] = "-mfpmath=";
2973 switch ((int) fpmath)
2975 case FPMATH_387:
2976 opts[num++][1] = "387";
2977 break;
2979 case FPMATH_SSE:
2980 opts[num++][1] = "sse";
2981 break;
2983 case FPMATH_387 | FPMATH_SSE:
2984 opts[num++][1] = "sse+387";
2985 break;
2987 default:
2988 gcc_unreachable ();
2992 /* Any options? */
2993 if (num == 0)
2994 return NULL;
2996 gcc_assert (num < ARRAY_SIZE (opts));
2998 /* Size the string. */
2999 len = 0;
3000 sep_len = (add_nl_p) ? 3 : 1;
3001 for (i = 0; i < num; i++)
3003 len += sep_len;
3004 for (j = 0; j < 2; j++)
3005 if (opts[i][j])
3006 len += strlen (opts[i][j]);
3009 /* Build the string. */
3010 ret = ptr = (char *) xmalloc (len);
3011 line_len = 0;
3013 for (i = 0; i < num; i++)
3015 size_t len2[2];
3017 for (j = 0; j < 2; j++)
3018 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3020 if (i != 0)
3022 *ptr++ = ' ';
3023 line_len++;
3025 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3027 *ptr++ = '\\';
3028 *ptr++ = '\n';
3029 line_len = 0;
3033 for (j = 0; j < 2; j++)
3034 if (opts[i][j])
3036 memcpy (ptr, opts[i][j], len2[j]);
3037 ptr += len2[j];
3038 line_len += len2[j];
3042 *ptr = '\0';
3043 gcc_assert (ret + len >= ptr);
3045 return ret;
3048 /* Return true, if profiling code should be emitted before
3049 prologue. Otherwise it returns false.
3050 Note: For x86 with "hotfix" it is sorried. */
3051 static bool
3052 ix86_profile_before_prologue (void)
3054 return flag_fentry != 0;
3057 /* Function that is callable from the debugger to print the current
3058 options. */
3059 void ATTRIBUTE_UNUSED
3060 ix86_debug_options (void)
3062 char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2,
3063 target_flags, ix86_target_flags,
3064 ix86_arch_string,ix86_tune_string,
3065 ix86_fpmath, true);
3067 if (opts)
3069 fprintf (stderr, "%s\n\n", opts);
3070 free (opts);
3072 else
3073 fputs ("<no options>\n\n", stderr);
3075 return;
3078 /* Return true if T is one of the bytes we should avoid with
3079 -fmitigate-rop. */
3081 static bool
3082 ix86_rop_should_change_byte_p (int t)
3084 return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
3087 static const char *stringop_alg_names[] = {
3088 #define DEF_ENUM
3089 #define DEF_ALG(alg, name) #name,
3090 #include "stringop.def"
3091 #undef DEF_ENUM
3092 #undef DEF_ALG
3095 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
3096 The string is of the following form (or comma separated list of it):
3098 strategy_alg:max_size:[align|noalign]
3100 where the full size range for the strategy is either [0, max_size] or
3101 [min_size, max_size], in which min_size is the max_size + 1 of the
3102 preceding range. The last size range must have max_size == -1.
3104 Examples:
3107 -mmemcpy-strategy=libcall:-1:noalign
3109 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
3113 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
3115 This is to tell the compiler to use the following strategy for memset
3116 1) when the expected size is between [1, 16], use rep_8byte strategy;
3117 2) when the size is between [17, 2048], use vector_loop;
3118 3) when the size is > 2048, use libcall. */
3120 struct stringop_size_range
3122 int max;
3123 stringop_alg alg;
3124 bool noalign;
3127 static void
3128 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
3130 const struct stringop_algs *default_algs;
3131 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
3132 char *curr_range_str, *next_range_str;
3133 const char *opt = is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=";
3134 int i = 0, n = 0;
3136 if (is_memset)
3137 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
3138 else
3139 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
3141 curr_range_str = strategy_str;
3145 int maxs;
3146 char alg_name[128];
3147 char align[16];
3148 next_range_str = strchr (curr_range_str, ',');
3149 if (next_range_str)
3150 *next_range_str++ = '\0';
3152 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
3153 alg_name, &maxs, align))
3155 error ("wrong argument %qs to option %qs", curr_range_str, opt);
3156 return;
3159 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
3161 error ("size ranges of option %qs should be increasing", opt);
3162 return;
3165 for (i = 0; i < last_alg; i++)
3166 if (!strcmp (alg_name, stringop_alg_names[i]))
3167 break;
3169 if (i == last_alg)
3171 error ("wrong strategy name %qs specified for option %qs",
3172 alg_name, opt);
3174 auto_vec <const char *> candidates;
3175 for (i = 0; i < last_alg; i++)
3176 if ((stringop_alg) i != rep_prefix_8_byte || TARGET_64BIT)
3177 candidates.safe_push (stringop_alg_names[i]);
3179 char *s;
3180 const char *hint
3181 = candidates_list_and_hint (alg_name, s, candidates);
3182 if (hint)
3183 inform (input_location,
3184 "valid arguments to %qs are: %s; did you mean %qs?",
3185 opt, s, hint);
3186 else
3187 inform (input_location, "valid arguments to %qs are: %s",
3188 opt, s);
3189 XDELETEVEC (s);
3190 return;
3193 if ((stringop_alg) i == rep_prefix_8_byte
3194 && !TARGET_64BIT)
3196 /* rep; movq isn't available in 32-bit code. */
3197 error ("strategy name %qs specified for option %qs "
3198 "not supported for 32-bit code", alg_name, opt);
3199 return;
3202 input_ranges[n].max = maxs;
3203 input_ranges[n].alg = (stringop_alg) i;
3204 if (!strcmp (align, "align"))
3205 input_ranges[n].noalign = false;
3206 else if (!strcmp (align, "noalign"))
3207 input_ranges[n].noalign = true;
3208 else
3210 error ("unknown alignment %qs specified for option %qs", align, opt);
3211 return;
3213 n++;
3214 curr_range_str = next_range_str;
3216 while (curr_range_str);
3218 if (input_ranges[n - 1].max != -1)
3220 error ("the max value for the last size range should be -1"
3221 " for option %qs", opt);
3222 return;
3225 if (n > MAX_STRINGOP_ALGS)
3227 error ("too many size ranges specified in option %qs", opt);
3228 return;
3231 /* Now override the default algs array. */
3232 for (i = 0; i < n; i++)
3234 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3235 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3236 = input_ranges[i].alg;
3237 *const_cast<int *>(&default_algs->size[i].noalign)
3238 = input_ranges[i].noalign;
3243 /* parse -mtune-ctrl= option. When DUMP is true,
3244 print the features that are explicitly set. */
3246 static void
3247 parse_mtune_ctrl_str (bool dump)
3249 if (!ix86_tune_ctrl_string)
3250 return;
3252 char *next_feature_string = NULL;
3253 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3254 char *orig = curr_feature_string;
3255 int i;
3258 bool clear = false;
3260 next_feature_string = strchr (curr_feature_string, ',');
3261 if (next_feature_string)
3262 *next_feature_string++ = '\0';
3263 if (*curr_feature_string == '^')
3265 curr_feature_string++;
3266 clear = true;
3268 for (i = 0; i < X86_TUNE_LAST; i++)
3270 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3272 ix86_tune_features[i] = !clear;
3273 if (dump)
3274 fprintf (stderr, "Explicitly %s feature %s\n",
3275 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3276 break;
3279 if (i == X86_TUNE_LAST)
3280 error ("Unknown parameter to option -mtune-ctrl: %s",
3281 clear ? curr_feature_string - 1 : curr_feature_string);
3282 curr_feature_string = next_feature_string;
3284 while (curr_feature_string);
3285 free (orig);
3288 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3289 processor type. */
3291 static void
3292 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3294 unsigned int ix86_tune_mask = 1u << ix86_tune;
3295 int i;
3297 for (i = 0; i < X86_TUNE_LAST; ++i)
3299 if (ix86_tune_no_default)
3300 ix86_tune_features[i] = 0;
3301 else
3302 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3305 if (dump)
3307 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3308 for (i = 0; i < X86_TUNE_LAST; i++)
3309 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3310 ix86_tune_features[i] ? "on" : "off");
3313 parse_mtune_ctrl_str (dump);
3317 /* Default align_* from the processor table. */
3319 static void
3320 ix86_default_align (struct gcc_options *opts)
3322 if (opts->x_align_loops == 0)
3324 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3325 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3327 if (opts->x_align_jumps == 0)
3329 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3330 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3332 if (opts->x_align_functions == 0)
3334 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3338 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3340 static void
3341 ix86_override_options_after_change (void)
3343 ix86_default_align (&global_options);
3346 /* Override various settings based on options. If MAIN_ARGS_P, the
3347 options are from the command line, otherwise they are from
3348 attributes. Return true if there's an error related to march
3349 option. */
3351 static bool
3352 ix86_option_override_internal (bool main_args_p,
3353 struct gcc_options *opts,
3354 struct gcc_options *opts_set)
3356 int i;
3357 unsigned int ix86_arch_mask;
3358 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3360 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3361 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3362 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3363 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3364 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3365 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3366 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3367 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3368 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3369 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3370 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3371 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3372 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3373 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3374 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3375 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3376 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3377 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3378 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3379 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3380 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3381 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3382 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3383 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3384 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3385 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3386 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3387 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3388 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3389 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3390 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3391 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3392 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3393 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3394 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3395 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3396 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3397 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3398 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3399 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3400 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3401 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3402 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3403 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3404 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3405 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3406 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3407 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3408 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3409 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3410 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3411 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3412 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3413 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3414 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3415 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3416 #define PTA_MWAITX (HOST_WIDE_INT_1 << 56)
3417 #define PTA_CLZERO (HOST_WIDE_INT_1 << 57)
3418 #define PTA_NO_80387 (HOST_WIDE_INT_1 << 58)
3419 #define PTA_PKU (HOST_WIDE_INT_1 << 59)
3420 #define PTA_AVX5124VNNIW (HOST_WIDE_INT_1 << 60)
3421 #define PTA_AVX5124FMAPS (HOST_WIDE_INT_1 << 61)
3422 #define PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1 << 62)
3423 #define PTA_SGX (HOST_WIDE_INT_1 << 63)
3425 #define PTA_CORE2 \
3426 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3427 | PTA_CX16 | PTA_FXSR)
3428 #define PTA_NEHALEM \
3429 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3430 #define PTA_WESTMERE \
3431 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3432 #define PTA_SANDYBRIDGE \
3433 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3434 #define PTA_IVYBRIDGE \
3435 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3436 #define PTA_HASWELL \
3437 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3438 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3439 #define PTA_BROADWELL \
3440 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3441 #define PTA_SKYLAKE \
3442 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
3443 #define PTA_SKYLAKE_AVX512 \
3444 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
3445 | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU)
3446 #define PTA_KNL \
3447 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3448 #define PTA_BONNELL \
3449 (PTA_CORE2 | PTA_MOVBE)
3450 #define PTA_SILVERMONT \
3451 (PTA_WESTMERE | PTA_MOVBE)
3452 #define PTA_KNM \
3453 (PTA_KNL | PTA_AVX5124VNNIW | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ)
3455 /* if this reaches 64, need to widen struct pta flags below */
3457 static struct pta
3459 const char *const name; /* processor name or nickname. */
3460 const enum processor_type processor;
3461 const enum attr_cpu schedule;
3462 const unsigned HOST_WIDE_INT flags;
3464 const processor_alias_table[] =
3466 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3467 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3468 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3469 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3470 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
3471 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3472 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3473 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3474 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3475 {"samuel-2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3476 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3477 PTA_MMX | PTA_SSE | PTA_FXSR},
3478 {"nehemiah", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3479 PTA_MMX | PTA_SSE | PTA_FXSR},
3480 {"c7", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3481 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3482 {"esther", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3483 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3484 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3485 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3486 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3487 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3488 PTA_MMX | PTA_SSE | PTA_FXSR},
3489 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3490 PTA_MMX | PTA_SSE | PTA_FXSR},
3491 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3492 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3493 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3494 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3495 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3496 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3497 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3498 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3499 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3500 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3501 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3502 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3503 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3504 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3505 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3506 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3507 PTA_SANDYBRIDGE},
3508 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3509 PTA_SANDYBRIDGE},
3510 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3511 PTA_IVYBRIDGE},
3512 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3513 PTA_IVYBRIDGE},
3514 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3515 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3516 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
3517 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
3518 {"skylake-avx512", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE_AVX512},
3519 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3520 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3521 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3522 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3523 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
3524 {"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM},
3525 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3526 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3527 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3528 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3529 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3530 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3531 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3532 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3533 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3534 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3535 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3536 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3537 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3538 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3539 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3540 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3541 {"x86-64", PROCESSOR_K8, CPU_K8,
3542 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3543 {"eden-x2", PROCESSOR_K8, CPU_K8,
3544 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3545 {"nano", PROCESSOR_K8, CPU_K8,
3546 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3547 | PTA_SSSE3 | PTA_FXSR},
3548 {"nano-1000", PROCESSOR_K8, CPU_K8,
3549 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3550 | PTA_SSSE3 | PTA_FXSR},
3551 {"nano-2000", PROCESSOR_K8, CPU_K8,
3552 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3553 | PTA_SSSE3 | PTA_FXSR},
3554 {"nano-3000", PROCESSOR_K8, CPU_K8,
3555 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3556 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3557 {"nano-x2", PROCESSOR_K8, CPU_K8,
3558 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3559 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3560 {"eden-x4", PROCESSOR_K8, CPU_K8,
3561 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3562 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3563 {"nano-x4", PROCESSOR_K8, CPU_K8,
3564 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3565 | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3566 {"k8", PROCESSOR_K8, CPU_K8,
3567 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3568 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3569 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3570 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3571 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3572 {"opteron", PROCESSOR_K8, CPU_K8,
3573 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3574 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3575 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3576 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3577 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3578 {"athlon64", PROCESSOR_K8, CPU_K8,
3579 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3580 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3581 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3582 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3583 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3584 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3585 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3586 | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3587 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3588 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3589 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3590 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3591 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3592 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3593 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3594 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3595 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3596 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3597 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3598 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3599 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3600 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3601 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3602 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3603 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3604 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3605 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3606 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3607 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3608 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3609 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3610 | PTA_XSAVEOPT | PTA_FSGSBASE},
3611 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3612 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3613 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3614 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3615 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3616 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3617 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3618 | PTA_MOVBE | PTA_MWAITX},
3619 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
3620 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3621 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3622 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3623 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
3624 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
3625 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
3626 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
3627 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
3628 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3629 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3630 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3631 | PTA_FXSR | PTA_XSAVE},
3632 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3633 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3634 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3635 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3636 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3637 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3639 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3640 PTA_64BIT
3641 | PTA_HLE /* flags are only used for -march switch. */ },
3644 /* -mrecip options. */
3645 static struct
3647 const char *string; /* option name */
3648 unsigned int mask; /* mask bits to set */
3650 const recip_options[] =
3652 { "all", RECIP_MASK_ALL },
3653 { "none", RECIP_MASK_NONE },
3654 { "div", RECIP_MASK_DIV },
3655 { "sqrt", RECIP_MASK_SQRT },
3656 { "vec-div", RECIP_MASK_VEC_DIV },
3657 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3660 int const pta_size = ARRAY_SIZE (processor_alias_table);
3662 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3663 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3664 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3665 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3666 #ifdef TARGET_BI_ARCH
3667 else
3669 #if TARGET_BI_ARCH == 1
3670 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3671 is on and OPTION_MASK_ABI_X32 is off. We turn off
3672 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3673 -mx32. */
3674 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3675 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3676 #else
3677 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3678 on and OPTION_MASK_ABI_64 is off. We turn off
3679 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3680 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3681 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3682 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3683 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3684 #endif
3685 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3686 && TARGET_IAMCU_P (opts->x_target_flags))
3687 sorry ("Intel MCU psABI isn%'t supported in %s mode",
3688 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
3690 #endif
3692 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3694 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3695 OPTION_MASK_ABI_64 for TARGET_X32. */
3696 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3697 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3699 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3700 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3701 | OPTION_MASK_ABI_X32
3702 | OPTION_MASK_ABI_64);
3703 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3705 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3706 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3708 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3711 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3712 SUBTARGET_OVERRIDE_OPTIONS;
3713 #endif
3715 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3716 SUBSUBTARGET_OVERRIDE_OPTIONS;
3717 #endif
3719 /* -fPIC is the default for x86_64. */
3720 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3721 opts->x_flag_pic = 2;
3723 /* Need to check -mtune=generic first. */
3724 if (opts->x_ix86_tune_string)
3726 /* As special support for cross compilers we read -mtune=native
3727 as -mtune=generic. With native compilers we won't see the
3728 -mtune=native, as it was changed by the driver. */
3729 if (!strcmp (opts->x_ix86_tune_string, "native"))
3731 opts->x_ix86_tune_string = "generic";
3733 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3734 warning (OPT_Wdeprecated,
3735 main_args_p
3736 ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> "
3737 "or %<-mtune=generic%> instead as appropriate")
3738 : G_("%<target(\"tune=x86-64\")%> is deprecated; use "
3739 "%<target(\"tune=k8\")%> or %<target(\"tune=generic\")%>"
3740 " instead as appropriate"));
3742 else
3744 if (opts->x_ix86_arch_string)
3745 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3746 if (!opts->x_ix86_tune_string)
3748 opts->x_ix86_tune_string
3749 = processor_target_table[TARGET_CPU_DEFAULT].name;
3750 ix86_tune_defaulted = 1;
3753 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3754 or defaulted. We need to use a sensible tune option. */
3755 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3757 opts->x_ix86_tune_string = "generic";
3761 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3762 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3764 /* rep; movq isn't available in 32-bit code. */
3765 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3766 opts->x_ix86_stringop_alg = no_stringop;
3769 if (!opts->x_ix86_arch_string)
3770 opts->x_ix86_arch_string
3771 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3772 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3773 else
3774 ix86_arch_specified = 1;
3776 if (opts_set->x_ix86_pmode)
3778 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3779 && opts->x_ix86_pmode == PMODE_SI)
3780 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3781 && opts->x_ix86_pmode == PMODE_DI))
3782 error ("address mode %qs not supported in the %s bit mode",
3783 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3784 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3786 else
3787 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3788 ? PMODE_DI : PMODE_SI;
3790 if (!opts_set->x_ix86_abi)
3791 opts->x_ix86_abi = DEFAULT_ABI;
3793 if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags))
3794 error ("-mabi=ms not supported with X32 ABI");
3795 gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI);
3797 /* For targets using ms ABI enable ms-extensions, if not
3798 explicit turned off. For non-ms ABI we turn off this
3799 option. */
3800 if (!opts_set->x_flag_ms_extensions)
3801 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3803 if (opts_set->x_ix86_cmodel)
3805 switch (opts->x_ix86_cmodel)
3807 case CM_SMALL:
3808 case CM_SMALL_PIC:
3809 if (opts->x_flag_pic)
3810 opts->x_ix86_cmodel = CM_SMALL_PIC;
3811 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3812 error ("code model %qs not supported in the %s bit mode",
3813 "small", "32");
3814 break;
3816 case CM_MEDIUM:
3817 case CM_MEDIUM_PIC:
3818 if (opts->x_flag_pic)
3819 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3820 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3821 error ("code model %qs not supported in the %s bit mode",
3822 "medium", "32");
3823 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3824 error ("code model %qs not supported in x32 mode",
3825 "medium");
3826 break;
3828 case CM_LARGE:
3829 case CM_LARGE_PIC:
3830 if (opts->x_flag_pic)
3831 opts->x_ix86_cmodel = CM_LARGE_PIC;
3832 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3833 error ("code model %qs not supported in the %s bit mode",
3834 "large", "32");
3835 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3836 error ("code model %qs not supported in x32 mode",
3837 "large");
3838 break;
3840 case CM_32:
3841 if (opts->x_flag_pic)
3842 error ("code model %s does not support PIC mode", "32");
3843 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3844 error ("code model %qs not supported in the %s bit mode",
3845 "32", "64");
3846 break;
3848 case CM_KERNEL:
3849 if (opts->x_flag_pic)
3851 error ("code model %s does not support PIC mode", "kernel");
3852 opts->x_ix86_cmodel = CM_32;
3854 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3855 error ("code model %qs not supported in the %s bit mode",
3856 "kernel", "32");
3857 break;
3859 default:
3860 gcc_unreachable ();
3863 else
3865 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3866 use of rip-relative addressing. This eliminates fixups that
3867 would otherwise be needed if this object is to be placed in a
3868 DLL, and is essentially just as efficient as direct addressing. */
3869 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3870 && (TARGET_RDOS || TARGET_PECOFF))
3871 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3872 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3873 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3874 else
3875 opts->x_ix86_cmodel = CM_32;
3877 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3879 error ("-masm=intel not supported in this configuration");
3880 opts->x_ix86_asm_dialect = ASM_ATT;
3882 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3883 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3884 sorry ("%i-bit mode not compiled in",
3885 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3887 for (i = 0; i < pta_size; i++)
3888 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3890 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3892 error (main_args_p
3893 ? G_("%<generic%> CPU can be used only for %<-mtune=%> "
3894 "switch")
3895 : G_("%<generic%> CPU can be used only for "
3896 "%<target(\"tune=\")%> attribute"));
3897 return false;
3899 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3901 error (main_args_p
3902 ? G_("%<intel%> CPU can be used only for %<-mtune=%> "
3903 "switch")
3904 : G_("%<intel%> CPU can be used only for "
3905 "%<target(\"tune=\")%> attribute"));
3906 return false;
3909 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3910 && !(processor_alias_table[i].flags & PTA_64BIT))
3912 error ("CPU you selected does not support x86-64 "
3913 "instruction set");
3914 return false;
3917 ix86_schedule = processor_alias_table[i].schedule;
3918 ix86_arch = processor_alias_table[i].processor;
3919 /* Default cpu tuning to the architecture. */
3920 ix86_tune = ix86_arch;
3922 if (processor_alias_table[i].flags & PTA_MMX
3923 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3924 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3925 if (processor_alias_table[i].flags & PTA_3DNOW
3926 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3927 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3928 if (processor_alias_table[i].flags & PTA_3DNOW_A
3929 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3930 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3931 if (processor_alias_table[i].flags & PTA_SSE
3932 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3933 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3934 if (processor_alias_table[i].flags & PTA_SSE2
3935 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3936 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3937 if (processor_alias_table[i].flags & PTA_SSE3
3938 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3939 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3940 if (processor_alias_table[i].flags & PTA_SSSE3
3941 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3942 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3943 if (processor_alias_table[i].flags & PTA_SSE4_1
3944 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3945 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3946 if (processor_alias_table[i].flags & PTA_SSE4_2
3947 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3948 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3949 if (processor_alias_table[i].flags & PTA_AVX
3950 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3951 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3952 if (processor_alias_table[i].flags & PTA_AVX2
3953 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3954 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3955 if (processor_alias_table[i].flags & PTA_FMA
3956 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3957 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3958 if (processor_alias_table[i].flags & PTA_SSE4A
3959 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3960 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3961 if (processor_alias_table[i].flags & PTA_FMA4
3962 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3963 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3964 if (processor_alias_table[i].flags & PTA_XOP
3965 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3966 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3967 if (processor_alias_table[i].flags & PTA_LWP
3968 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3969 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3970 if (processor_alias_table[i].flags & PTA_ABM
3971 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3972 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3973 if (processor_alias_table[i].flags & PTA_BMI
3974 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3975 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3976 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3977 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3978 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3979 if (processor_alias_table[i].flags & PTA_TBM
3980 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3981 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3982 if (processor_alias_table[i].flags & PTA_BMI2
3983 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3984 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3985 if (processor_alias_table[i].flags & PTA_CX16
3986 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3987 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3988 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3989 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3990 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3991 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3992 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3993 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3994 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3995 if (processor_alias_table[i].flags & PTA_MOVBE
3996 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3997 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3998 if (processor_alias_table[i].flags & PTA_AES
3999 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
4000 ix86_isa_flags |= OPTION_MASK_ISA_AES;
4001 if (processor_alias_table[i].flags & PTA_SHA
4002 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
4003 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
4004 if (processor_alias_table[i].flags & PTA_PCLMUL
4005 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
4006 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
4007 if (processor_alias_table[i].flags & PTA_FSGSBASE
4008 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
4009 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
4010 if (processor_alias_table[i].flags & PTA_RDRND
4011 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
4012 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
4013 if (processor_alias_table[i].flags & PTA_F16C
4014 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
4015 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
4016 if (processor_alias_table[i].flags & PTA_RTM
4017 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
4018 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
4019 if (processor_alias_table[i].flags & PTA_HLE
4020 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
4021 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
4022 if (processor_alias_table[i].flags & PTA_PRFCHW
4023 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
4024 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
4025 if (processor_alias_table[i].flags & PTA_RDSEED
4026 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
4027 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
4028 if (processor_alias_table[i].flags & PTA_ADX
4029 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
4030 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
4031 if (processor_alias_table[i].flags & PTA_FXSR
4032 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
4033 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
4034 if (processor_alias_table[i].flags & PTA_XSAVE
4035 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
4036 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
4037 if (processor_alias_table[i].flags & PTA_XSAVEOPT
4038 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
4039 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
4040 if (processor_alias_table[i].flags & PTA_AVX512F
4041 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
4042 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
4043 if (processor_alias_table[i].flags & PTA_AVX512ER
4044 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
4045 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
4046 if (processor_alias_table[i].flags & PTA_AVX512PF
4047 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
4048 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
4049 if (processor_alias_table[i].flags & PTA_AVX512CD
4050 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
4051 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
4052 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
4053 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
4054 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
4055 if (processor_alias_table[i].flags & PTA_CLWB
4056 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
4057 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
4058 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
4059 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
4060 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
4061 if (processor_alias_table[i].flags & PTA_CLZERO
4062 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
4063 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
4064 if (processor_alias_table[i].flags & PTA_XSAVEC
4065 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
4066 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
4067 if (processor_alias_table[i].flags & PTA_XSAVES
4068 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
4069 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
4070 if (processor_alias_table[i].flags & PTA_AVX512DQ
4071 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
4072 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
4073 if (processor_alias_table[i].flags & PTA_AVX512BW
4074 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
4075 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
4076 if (processor_alias_table[i].flags & PTA_AVX512VL
4077 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
4078 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
4079 if (processor_alias_table[i].flags & PTA_MPX
4080 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
4081 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
4082 if (processor_alias_table[i].flags & PTA_AVX512VBMI
4083 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
4084 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
4085 if (processor_alias_table[i].flags & PTA_AVX512IFMA
4086 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
4087 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
4089 if (processor_alias_table[i].flags & PTA_AVX5124VNNIW
4090 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX5124VNNIW))
4091 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW;
4092 if (processor_alias_table[i].flags & PTA_AVX5124FMAPS
4093 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX5124FMAPS))
4094 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS;
4095 if (processor_alias_table[i].flags & PTA_AVX512VPOPCNTDQ
4096 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_AVX512VPOPCNTDQ))
4097 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512VPOPCNTDQ;
4098 if (processor_alias_table[i].flags & PTA_SGX
4099 && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX))
4100 opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX;
4102 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
4103 x86_prefetch_sse = true;
4104 if (processor_alias_table[i].flags & PTA_MWAITX
4105 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
4106 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
4107 if (processor_alias_table[i].flags & PTA_PKU
4108 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
4109 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
4111 /* Don't enable x87 instructions if only
4112 general registers are allowed. */
4113 if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY)
4114 && !(opts_set->x_target_flags & MASK_80387))
4116 if (processor_alias_table[i].flags & PTA_NO_80387)
4117 opts->x_target_flags &= ~MASK_80387;
4118 else
4119 opts->x_target_flags |= MASK_80387;
4121 break;
4124 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
4125 error ("Intel MPX does not support x32");
4127 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
4128 error ("Intel MPX does not support x32");
4130 if (i == pta_size)
4132 error (main_args_p
4133 ? G_("bad value (%qs) for %<-march=%> switch")
4134 : G_("bad value (%qs) for %<target(\"arch=\")%> attribute"),
4135 opts->x_ix86_arch_string);
4137 auto_vec <const char *> candidates;
4138 for (i = 0; i < pta_size; i++)
4139 if (strcmp (processor_alias_table[i].name, "generic")
4140 && strcmp (processor_alias_table[i].name, "intel")
4141 && (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4142 || (processor_alias_table[i].flags & PTA_64BIT)))
4143 candidates.safe_push (processor_alias_table[i].name);
4145 char *s;
4146 const char *hint
4147 = candidates_list_and_hint (opts->x_ix86_arch_string, s, candidates);
4148 if (hint)
4149 inform (input_location,
4150 main_args_p
4151 ? G_("valid arguments to %<-march=%> switch are: "
4152 "%s; did you mean %qs?")
4153 : G_("valid arguments to %<target(\"arch=\")%> attribute are: "
4154 "%s; did you mean %qs?"), s, hint);
4155 else
4156 inform (input_location,
4157 main_args_p
4158 ? G_("valid arguments to %<-march=%> switch are: %s")
4159 : G_("valid arguments to %<target(\"arch=\")%> attribute "
4160 "are: %s"), s);
4161 XDELETEVEC (s);
4164 ix86_arch_mask = 1u << ix86_arch;
4165 for (i = 0; i < X86_ARCH_LAST; ++i)
4166 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4168 for (i = 0; i < pta_size; i++)
4169 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4171 ix86_schedule = processor_alias_table[i].schedule;
4172 ix86_tune = processor_alias_table[i].processor;
4173 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4175 if (!(processor_alias_table[i].flags & PTA_64BIT))
4177 if (ix86_tune_defaulted)
4179 opts->x_ix86_tune_string = "x86-64";
4180 for (i = 0; i < pta_size; i++)
4181 if (! strcmp (opts->x_ix86_tune_string,
4182 processor_alias_table[i].name))
4183 break;
4184 ix86_schedule = processor_alias_table[i].schedule;
4185 ix86_tune = processor_alias_table[i].processor;
4187 else
4188 error ("CPU you selected does not support x86-64 "
4189 "instruction set");
4192 /* Intel CPUs have always interpreted SSE prefetch instructions as
4193 NOPs; so, we can enable SSE prefetch instructions even when
4194 -mtune (rather than -march) points us to a processor that has them.
4195 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
4196 higher processors. */
4197 if (TARGET_CMOV
4198 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
4199 x86_prefetch_sse = true;
4200 break;
4203 if (ix86_tune_specified && i == pta_size)
4205 error (main_args_p
4206 ? G_("bad value (%qs) for %<-mtune=%> switch")
4207 : G_("bad value (%qs) for %<target(\"tune=\")%> attribute"),
4208 opts->x_ix86_tune_string);
4210 auto_vec <const char *> candidates;
4211 for (i = 0; i < pta_size; i++)
4212 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4213 || (processor_alias_table[i].flags & PTA_64BIT))
4214 candidates.safe_push (processor_alias_table[i].name);
4216 char *s;
4217 const char *hint
4218 = candidates_list_and_hint (opts->x_ix86_tune_string, s, candidates);
4219 if (hint)
4220 inform (input_location,
4221 main_args_p
4222 ? G_("valid arguments to %<-mtune=%> switch are: "
4223 "%s; did you mean %qs?")
4224 : G_("valid arguments to %<target(\"tune=\")%> attribute are: "
4225 "%s; did you mean %qs?"), s, hint);
4226 else
4227 inform (input_location,
4228 main_args_p
4229 ? G_("valid arguments to %<-mtune=%> switch are: %s")
4230 : G_("valid arguments to %<target(\"tune=\")%> attribute "
4231 "are: %s"), s);
4232 XDELETEVEC (s);
4235 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
4237 #ifndef USE_IX86_FRAME_POINTER
4238 #define USE_IX86_FRAME_POINTER 0
4239 #endif
4241 #ifndef USE_X86_64_FRAME_POINTER
4242 #define USE_X86_64_FRAME_POINTER 0
4243 #endif
4245 /* Set the default values for switches whose default depends on TARGET_64BIT
4246 in case they weren't overwritten by command line options. */
4247 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4249 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4250 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
4251 if (opts->x_flag_asynchronous_unwind_tables
4252 && !opts_set->x_flag_unwind_tables
4253 && TARGET_64BIT_MS_ABI)
4254 opts->x_flag_unwind_tables = 1;
4255 if (opts->x_flag_asynchronous_unwind_tables == 2)
4256 opts->x_flag_unwind_tables
4257 = opts->x_flag_asynchronous_unwind_tables = 1;
4258 if (opts->x_flag_pcc_struct_return == 2)
4259 opts->x_flag_pcc_struct_return = 0;
4261 else
4263 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4264 opts->x_flag_omit_frame_pointer
4265 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
4266 if (opts->x_flag_asynchronous_unwind_tables == 2)
4267 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
4268 if (opts->x_flag_pcc_struct_return == 2)
4270 /* Intel MCU psABI specifies that -freg-struct-return should
4271 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
4272 we check -miamcu so that -freg-struct-return is always
4273 turned on if -miamcu is used. */
4274 if (TARGET_IAMCU_P (opts->x_target_flags))
4275 opts->x_flag_pcc_struct_return = 0;
4276 else
4277 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
4281 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4282 /* TODO: ix86_cost should be chosen at instruction or function granuality
4283 so for cold code we use size_cost even in !optimize_size compilation. */
4284 if (opts->x_optimize_size)
4285 ix86_cost = &ix86_size_cost;
4286 else
4287 ix86_cost = ix86_tune_cost;
4289 /* Arrange to set up i386_stack_locals for all functions. */
4290 init_machine_status = ix86_init_machine_status;
4292 /* Validate -mregparm= value. */
4293 if (opts_set->x_ix86_regparm)
4295 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4296 warning (0, "-mregparm is ignored in 64-bit mode");
4297 else if (TARGET_IAMCU_P (opts->x_target_flags))
4298 warning (0, "-mregparm is ignored for Intel MCU psABI");
4299 if (opts->x_ix86_regparm > REGPARM_MAX)
4301 error ("-mregparm=%d is not between 0 and %d",
4302 opts->x_ix86_regparm, REGPARM_MAX);
4303 opts->x_ix86_regparm = 0;
4306 if (TARGET_IAMCU_P (opts->x_target_flags)
4307 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
4308 opts->x_ix86_regparm = REGPARM_MAX;
4310 /* Default align_* from the processor table. */
4311 ix86_default_align (opts);
4313 /* Provide default for -mbranch-cost= value. */
4314 if (!opts_set->x_ix86_branch_cost)
4315 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
4317 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4319 opts->x_target_flags
4320 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
4322 /* Enable by default the SSE and MMX builtins. Do allow the user to
4323 explicitly disable any of these. In particular, disabling SSE and
4324 MMX for kernel code is extremely useful. */
4325 if (!ix86_arch_specified)
4326 opts->x_ix86_isa_flags
4327 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
4328 | TARGET_SUBTARGET64_ISA_DEFAULT)
4329 & ~opts->x_ix86_isa_flags_explicit);
4331 if (TARGET_RTD_P (opts->x_target_flags))
4332 warning (0,
4333 main_args_p
4334 ? G_("%<-mrtd%> is ignored in 64bit mode")
4335 : G_("%<target(\"rtd\")%> is ignored in 64bit mode"));
4337 else
4339 opts->x_target_flags
4340 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
4342 if (!ix86_arch_specified)
4343 opts->x_ix86_isa_flags
4344 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
4346 /* i386 ABI does not specify red zone. It still makes sense to use it
4347 when programmer takes care to stack from being destroyed. */
4348 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
4349 opts->x_target_flags |= MASK_NO_RED_ZONE;
4352 /* Keep nonleaf frame pointers. */
4353 if (opts->x_flag_omit_frame_pointer)
4354 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4355 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
4356 opts->x_flag_omit_frame_pointer = 1;
4358 /* If we're doing fast math, we don't care about comparison order
4359 wrt NaNs. This lets us use a shorter comparison sequence. */
4360 if (opts->x_flag_finite_math_only)
4361 opts->x_target_flags &= ~MASK_IEEE_FP;
4363 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4364 since the insns won't need emulation. */
4365 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
4366 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4368 /* Likewise, if the target doesn't have a 387, or we've specified
4369 software floating point, don't use 387 inline intrinsics. */
4370 if (!TARGET_80387_P (opts->x_target_flags))
4371 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4373 /* Turn on MMX builtins for -msse. */
4374 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4375 opts->x_ix86_isa_flags
4376 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4378 /* Enable SSE prefetch. */
4379 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4380 || (TARGET_PRFCHW_P (opts->x_ix86_isa_flags)
4381 && !TARGET_3DNOW_P (opts->x_ix86_isa_flags))
4382 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4383 x86_prefetch_sse = true;
4385 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4386 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4387 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4388 opts->x_ix86_isa_flags
4389 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4391 /* Enable lzcnt instruction for -mabm. */
4392 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4393 opts->x_ix86_isa_flags
4394 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4396 /* Disable BMI, BMI2 and TBM instructions for -m16. */
4397 if (TARGET_16BIT_P(opts->x_ix86_isa_flags))
4398 opts->x_ix86_isa_flags
4399 &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM)
4400 & ~opts->x_ix86_isa_flags_explicit);
4402 /* Validate -mpreferred-stack-boundary= value or default it to
4403 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4404 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4405 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4407 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags)? 3 : 2;
4408 int max = TARGET_SEH ? 4 : 12;
4410 if (opts->x_ix86_preferred_stack_boundary_arg < min
4411 || opts->x_ix86_preferred_stack_boundary_arg > max)
4413 if (min == max)
4414 error ("-mpreferred-stack-boundary is not supported "
4415 "for this target");
4416 else
4417 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4418 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4420 else
4421 ix86_preferred_stack_boundary
4422 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4425 /* Set the default value for -mstackrealign. */
4426 if (!opts_set->x_ix86_force_align_arg_pointer)
4427 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4429 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4431 /* Validate -mincoming-stack-boundary= value or default it to
4432 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4433 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4434 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4436 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
4438 if (opts->x_ix86_incoming_stack_boundary_arg < min
4439 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4440 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4441 opts->x_ix86_incoming_stack_boundary_arg, min);
4442 else
4444 ix86_user_incoming_stack_boundary
4445 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4446 ix86_incoming_stack_boundary
4447 = ix86_user_incoming_stack_boundary;
4451 #ifndef NO_PROFILE_COUNTERS
4452 if (flag_nop_mcount)
4453 error ("-mnop-mcount is not compatible with this target");
4454 #endif
4455 if (flag_nop_mcount && flag_pic)
4456 error ("-mnop-mcount is not implemented for -fPIC");
4458 /* Accept -msseregparm only if at least SSE support is enabled. */
4459 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4460 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4461 error (main_args_p
4462 ? G_("%<-msseregparm%> used without SSE enabled")
4463 : G_("%<target(\"sseregparm\")%> used without SSE enabled"));
4465 if (opts_set->x_ix86_fpmath)
4467 if (opts->x_ix86_fpmath & FPMATH_SSE)
4469 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4471 if (TARGET_80387_P (opts->x_target_flags))
4473 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4474 opts->x_ix86_fpmath = FPMATH_387;
4477 else if ((opts->x_ix86_fpmath & FPMATH_387)
4478 && !TARGET_80387_P (opts->x_target_flags))
4480 warning (0, "387 instruction set disabled, using SSE arithmetics");
4481 opts->x_ix86_fpmath = FPMATH_SSE;
4485 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4486 fpmath=387. The second is however default at many targets since the
4487 extra 80bit precision of temporaries is considered to be part of ABI.
4488 Overwrite the default at least for -ffast-math.
4489 TODO: -mfpmath=both seems to produce same performing code with bit
4490 smaller binaries. It is however not clear if register allocation is
4491 ready for this setting.
4492 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4493 codegen. We may switch to 387 with -ffast-math for size optimized
4494 functions. */
4495 else if (fast_math_flags_set_p (&global_options)
4496 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4497 opts->x_ix86_fpmath = FPMATH_SSE;
4498 else
4499 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4501 /* Use external vectorized library in vectorizing intrinsics. */
4502 if (opts_set->x_ix86_veclibabi_type)
4503 switch (opts->x_ix86_veclibabi_type)
4505 case ix86_veclibabi_type_svml:
4506 ix86_veclib_handler = ix86_veclibabi_svml;
4507 break;
4509 case ix86_veclibabi_type_acml:
4510 ix86_veclib_handler = ix86_veclibabi_acml;
4511 break;
4513 default:
4514 gcc_unreachable ();
4517 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4518 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4519 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4521 /* If stack probes are required, the space used for large function
4522 arguments on the stack must also be probed, so enable
4523 -maccumulate-outgoing-args so this happens in the prologue. */
4524 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4525 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4527 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4528 warning (0,
4529 main_args_p
4530 ? G_("stack probing requires %<-maccumulate-outgoing-args%> "
4531 "for correctness")
4532 : G_("stack probing requires "
4533 "%<target(\"accumulate-outgoing-args\")%> for "
4534 "correctness"));
4535 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4538 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
4539 so enable -maccumulate-outgoing-args when %ebp is fixed. */
4540 if (fixed_regs[BP_REG]
4541 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4543 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4544 warning (0,
4545 main_args_p
4546 ? G_("fixed ebp register requires "
4547 "%<-maccumulate-outgoing-args%>")
4548 : G_("fixed ebp register requires "
4549 "%<target(\"accumulate-outgoing-args\")%>"));
4550 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4553 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4555 char *p;
4556 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4557 p = strchr (internal_label_prefix, 'X');
4558 internal_label_prefix_len = p - internal_label_prefix;
4559 *p = '\0';
4562 /* When scheduling description is not available, disable scheduler pass
4563 so it won't slow down the compilation and make x87 code slower. */
4564 if (!TARGET_SCHEDULE)
4565 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4567 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4568 ix86_tune_cost->simultaneous_prefetches,
4569 opts->x_param_values,
4570 opts_set->x_param_values);
4571 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4572 ix86_tune_cost->prefetch_block,
4573 opts->x_param_values,
4574 opts_set->x_param_values);
4575 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4576 ix86_tune_cost->l1_cache_size,
4577 opts->x_param_values,
4578 opts_set->x_param_values);
4579 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4580 ix86_tune_cost->l2_cache_size,
4581 opts->x_param_values,
4582 opts_set->x_param_values);
4584 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4585 if (opts->x_flag_prefetch_loop_arrays < 0
4586 && HAVE_prefetch
4587 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4588 && !opts->x_optimize_size
4589 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4590 opts->x_flag_prefetch_loop_arrays = 1;
4592 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4593 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4594 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4595 targetm.expand_builtin_va_start = NULL;
4597 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4599 ix86_gen_leave = gen_leave_rex64;
4600 if (Pmode == DImode)
4602 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4603 ix86_gen_tls_local_dynamic_base_64
4604 = gen_tls_local_dynamic_base_64_di;
4606 else
4608 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4609 ix86_gen_tls_local_dynamic_base_64
4610 = gen_tls_local_dynamic_base_64_si;
4613 else
4614 ix86_gen_leave = gen_leave;
4616 if (Pmode == DImode)
4618 ix86_gen_add3 = gen_adddi3;
4619 ix86_gen_sub3 = gen_subdi3;
4620 ix86_gen_sub3_carry = gen_subdi3_carry;
4621 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4622 ix86_gen_andsp = gen_anddi3;
4623 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4624 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4625 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4626 ix86_gen_monitor = gen_sse3_monitor_di;
4627 ix86_gen_monitorx = gen_monitorx_di;
4628 ix86_gen_clzero = gen_clzero_di;
4630 else
4632 ix86_gen_add3 = gen_addsi3;
4633 ix86_gen_sub3 = gen_subsi3;
4634 ix86_gen_sub3_carry = gen_subsi3_carry;
4635 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4636 ix86_gen_andsp = gen_andsi3;
4637 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4638 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4639 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4640 ix86_gen_monitor = gen_sse3_monitor_si;
4641 ix86_gen_monitorx = gen_monitorx_si;
4642 ix86_gen_clzero = gen_clzero_si;
4645 #ifdef USE_IX86_CLD
4646 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4647 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4648 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4649 #endif
4651 /* Set the default value for -mfentry. */
4652 if (!opts_set->x_flag_fentry)
4653 opts->x_flag_fentry = TARGET_SEH;
4654 else
4656 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic
4657 && opts->x_flag_fentry)
4658 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4659 "with -fpic");
4660 else if (TARGET_SEH && !opts->x_flag_fentry)
4661 sorry ("-mno-fentry isn%'t compatible with SEH");
4664 if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES)
4665 sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
4667 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4668 opts->x_target_flags |= MASK_VZEROUPPER;
4669 if (!(opts_set->x_target_flags & MASK_STV))
4670 opts->x_target_flags |= MASK_STV;
4671 /* Disable STV if -mpreferred-stack-boundary={2,3} or
4672 -mincoming-stack-boundary={2,3} or -mstackrealign - the needed
4673 stack realignment will be extra cost the pass doesn't take into
4674 account and the pass can't realign the stack. */
4675 if (ix86_preferred_stack_boundary < 128
4676 || ix86_incoming_stack_boundary < 128
4677 || opts->x_ix86_force_align_arg_pointer)
4678 opts->x_target_flags &= ~MASK_STV;
4679 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4680 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4681 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4682 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4683 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4684 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4685 /* Enable 128-bit AVX instruction generation
4686 for the auto-vectorizer. */
4687 if (TARGET_AVX128_OPTIMAL
4688 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4689 opts->x_target_flags |= MASK_PREFER_AVX128;
4691 if (opts->x_ix86_recip_name)
4693 char *p = ASTRDUP (opts->x_ix86_recip_name);
4694 char *q;
4695 unsigned int mask, i;
4696 bool invert;
4698 while ((q = strtok (p, ",")) != NULL)
4700 p = NULL;
4701 if (*q == '!')
4703 invert = true;
4704 q++;
4706 else
4707 invert = false;
4709 if (!strcmp (q, "default"))
4710 mask = RECIP_MASK_ALL;
4711 else
4713 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4714 if (!strcmp (q, recip_options[i].string))
4716 mask = recip_options[i].mask;
4717 break;
4720 if (i == ARRAY_SIZE (recip_options))
4722 error ("unknown option for -mrecip=%s", q);
4723 invert = false;
4724 mask = RECIP_MASK_NONE;
4728 opts->x_recip_mask_explicit |= mask;
4729 if (invert)
4730 opts->x_recip_mask &= ~mask;
4731 else
4732 opts->x_recip_mask |= mask;
4736 if (TARGET_RECIP_P (opts->x_target_flags))
4737 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4738 else if (opts_set->x_target_flags & MASK_RECIP)
4739 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4741 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4742 for 64-bit Bionic. Also default long double to 64-bit for Intel
4743 MCU psABI. */
4744 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
4745 && !(opts_set->x_target_flags
4746 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4747 opts->x_target_flags |= (TARGET_64BIT
4748 ? MASK_LONG_DOUBLE_128
4749 : MASK_LONG_DOUBLE_64);
4751 /* Only one of them can be active. */
4752 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4753 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4755 /* Handle stack protector */
4756 if (!opts_set->x_ix86_stack_protector_guard)
4757 opts->x_ix86_stack_protector_guard
4758 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4760 #ifdef TARGET_THREAD_SSP_OFFSET
4761 ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4762 #endif
4764 if (global_options_set.x_ix86_stack_protector_guard_offset_str)
4766 char *endp;
4767 const char *str = ix86_stack_protector_guard_offset_str;
4769 errno = 0;
4770 int64_t offset;
4772 #if defined(INT64_T_IS_LONG)
4773 offset = strtol (str, &endp, 0);
4774 #else
4775 offset = strtoll (str, &endp, 0);
4776 #endif
4778 if (!*str || *endp || errno)
4779 error ("%qs is not a valid number "
4780 "in -mstack-protector-guard-offset=", str);
4782 if (!IN_RANGE (offset, HOST_WIDE_INT_C (-0x80000000),
4783 HOST_WIDE_INT_C (0x7fffffff)))
4784 error ("%qs is not a valid offset "
4785 "in -mstack-protector-guard-offset=", str);
4787 ix86_stack_protector_guard_offset = offset;
4790 ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG;
4792 /* The kernel uses a different segment register for performance
4793 reasons; a system call would not have to trash the userspace
4794 segment register, which would be expensive. */
4795 if (ix86_cmodel == CM_KERNEL)
4796 ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS;
4798 if (global_options_set.x_ix86_stack_protector_guard_reg_str)
4800 const char *str = ix86_stack_protector_guard_reg_str;
4801 addr_space_t seg = ADDR_SPACE_GENERIC;
4803 /* Discard optional register prefix. */
4804 if (str[0] == '%')
4805 str++;
4807 if (strlen (str) == 2 && str[1] == 's')
4809 if (str[0] == 'f')
4810 seg = ADDR_SPACE_SEG_FS;
4811 else if (str[0] == 'g')
4812 seg = ADDR_SPACE_SEG_GS;
4815 if (seg == ADDR_SPACE_GENERIC)
4816 error ("%qs is not a valid base register "
4817 "in -mstack-protector-guard-reg=",
4818 ix86_stack_protector_guard_reg_str);
4820 ix86_stack_protector_guard_reg = seg;
4823 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4824 if (opts->x_ix86_tune_memcpy_strategy)
4826 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4827 ix86_parse_stringop_strategy_string (str, false);
4828 free (str);
4831 if (opts->x_ix86_tune_memset_strategy)
4833 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4834 ix86_parse_stringop_strategy_string (str, true);
4835 free (str);
4838 /* Save the initial options in case the user does function specific
4839 options. */
4840 if (main_args_p)
4841 target_option_default_node = target_option_current_node
4842 = build_target_option_node (opts);
4844 /* Do not support control flow instrumentation if CET is not enabled. */
4845 if (opts->x_flag_cf_protection != CF_NONE)
4847 if (!(TARGET_IBT_P (opts->x_ix86_isa_flags2)
4848 || TARGET_SHSTK_P (opts->x_ix86_isa_flags2)))
4850 if (flag_cf_protection == CF_FULL)
4852 error ("%<-fcf-protection=full%> requires CET support "
4853 "on this target. Use -mcet or one of -mibt, "
4854 "-mshstk options to enable CET");
4856 else if (flag_cf_protection == CF_BRANCH)
4858 error ("%<-fcf-protection=branch%> requires CET support "
4859 "on this target. Use -mcet or one of -mibt, "
4860 "-mshstk options to enable CET");
4862 else if (flag_cf_protection == CF_RETURN)
4864 error ("%<-fcf-protection=return%> requires CET support "
4865 "on this target. Use -mcet or one of -mibt, "
4866 "-mshstk options to enable CET");
4868 flag_cf_protection = CF_NONE;
4869 return false;
4871 opts->x_flag_cf_protection =
4872 (cf_protection_level) (opts->x_flag_cf_protection | CF_SET);
4875 return true;
4878 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4880 static void
4881 ix86_option_override (void)
4883 ix86_option_override_internal (true, &global_options, &global_options_set);
4886 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4887 static char *
4888 ix86_offload_options (void)
4890 if (TARGET_LP64)
4891 return xstrdup ("-foffload-abi=lp64");
4892 return xstrdup ("-foffload-abi=ilp32");
4895 /* Update register usage after having seen the compiler flags. */
4897 static void
4898 ix86_conditional_register_usage (void)
4900 int i, c_mask;
4902 /* If there are no caller-saved registers, preserve all registers.
4903 except fixed_regs and registers used for function return value
4904 since aggregate_value_p checks call_used_regs[regno] on return
4905 value. */
4906 if (cfun && cfun->machine->no_caller_saved_registers)
4907 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4908 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
4909 call_used_regs[i] = 0;
4911 /* For 32-bit targets, squash the REX registers. */
4912 if (! TARGET_64BIT)
4914 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4915 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4916 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4917 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4918 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4919 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4922 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4923 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
4925 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4927 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4929 /* Set/reset conditionally defined registers from
4930 CALL_USED_REGISTERS initializer. */
4931 if (call_used_regs[i] > 1)
4932 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4934 /* Calculate registers of CLOBBERED_REGS register set
4935 as call used registers from GENERAL_REGS register set. */
4936 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4937 && call_used_regs[i])
4938 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4941 /* If MMX is disabled, squash the registers. */
4942 if (! TARGET_MMX)
4943 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4944 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4945 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4947 /* If SSE is disabled, squash the registers. */
4948 if (! TARGET_SSE)
4949 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4950 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4951 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4953 /* If the FPU is disabled, squash the registers. */
4954 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4955 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4956 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4957 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4959 /* If AVX512F is disabled, squash the registers. */
4960 if (! TARGET_AVX512F)
4962 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4963 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4965 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4966 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4969 /* If MPX is disabled, squash the registers. */
4970 if (! TARGET_MPX)
4971 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4972 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4975 /* Canonicalize a comparison from one we don't have to one we do have. */
4977 static void
4978 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4979 bool op0_preserve_value)
4981 /* The order of operands in x87 ficom compare is forced by combine in
4982 simplify_comparison () function. Float operator is treated as RTX_OBJ
4983 with a precedence over other operators and is always put in the first
4984 place. Swap condition and operands to match ficom instruction. */
4985 if (!op0_preserve_value
4986 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
4988 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
4990 /* We are called only for compares that are split to SAHF instruction.
4991 Ensure that we have setcc/jcc insn for the swapped condition. */
4992 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
4994 std::swap (*op0, *op1);
4995 *code = (int) scode;
5000 /* Save the current options */
5002 static void
5003 ix86_function_specific_save (struct cl_target_option *ptr,
5004 struct gcc_options *opts)
5006 ptr->arch = ix86_arch;
5007 ptr->schedule = ix86_schedule;
5008 ptr->prefetch_sse = x86_prefetch_sse;
5009 ptr->tune = ix86_tune;
5010 ptr->branch_cost = ix86_branch_cost;
5011 ptr->tune_defaulted = ix86_tune_defaulted;
5012 ptr->arch_specified = ix86_arch_specified;
5013 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
5014 ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit;
5015 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
5016 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
5017 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
5018 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
5019 ptr->x_ix86_abi = opts->x_ix86_abi;
5020 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
5021 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
5022 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
5023 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
5024 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
5025 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
5026 ptr->x_ix86_pmode = opts->x_ix86_pmode;
5027 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
5028 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
5029 ptr->x_ix86_regparm = opts->x_ix86_regparm;
5030 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
5031 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
5032 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
5033 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
5034 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
5035 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
5036 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
5037 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
5038 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
5039 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
5041 /* The fields are char but the variables are not; make sure the
5042 values fit in the fields. */
5043 gcc_assert (ptr->arch == ix86_arch);
5044 gcc_assert (ptr->schedule == ix86_schedule);
5045 gcc_assert (ptr->tune == ix86_tune);
5046 gcc_assert (ptr->branch_cost == ix86_branch_cost);
5049 /* Restore the current options */
5051 static void
5052 ix86_function_specific_restore (struct gcc_options *opts,
5053 struct cl_target_option *ptr)
5055 enum processor_type old_tune = ix86_tune;
5056 enum processor_type old_arch = ix86_arch;
5057 unsigned int ix86_arch_mask;
5058 int i;
5060 /* We don't change -fPIC. */
5061 opts->x_flag_pic = flag_pic;
5063 ix86_arch = (enum processor_type) ptr->arch;
5064 ix86_schedule = (enum attr_cpu) ptr->schedule;
5065 ix86_tune = (enum processor_type) ptr->tune;
5066 x86_prefetch_sse = ptr->prefetch_sse;
5067 opts->x_ix86_branch_cost = ptr->branch_cost;
5068 ix86_tune_defaulted = ptr->tune_defaulted;
5069 ix86_arch_specified = ptr->arch_specified;
5070 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
5071 opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit;
5072 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
5073 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
5074 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
5075 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
5076 opts->x_ix86_abi = ptr->x_ix86_abi;
5077 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
5078 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
5079 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
5080 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
5081 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
5082 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
5083 opts->x_ix86_pmode = ptr->x_ix86_pmode;
5084 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
5085 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
5086 opts->x_ix86_regparm = ptr->x_ix86_regparm;
5087 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
5088 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
5089 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
5090 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
5091 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
5092 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
5093 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
5094 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
5095 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
5096 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
5097 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5098 /* TODO: ix86_cost should be chosen at instruction or function granuality
5099 so for cold code we use size_cost even in !optimize_size compilation. */
5100 if (opts->x_optimize_size)
5101 ix86_cost = &ix86_size_cost;
5102 else
5103 ix86_cost = ix86_tune_cost;
5105 /* Recreate the arch feature tests if the arch changed */
5106 if (old_arch != ix86_arch)
5108 ix86_arch_mask = 1u << ix86_arch;
5109 for (i = 0; i < X86_ARCH_LAST; ++i)
5110 ix86_arch_features[i]
5111 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5114 /* Recreate the tune optimization tests */
5115 if (old_tune != ix86_tune)
5116 set_ix86_tune_features (ix86_tune, false);
5119 /* Adjust target options after streaming them in. This is mainly about
5120 reconciling them with global options. */
5122 static void
5123 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
5125 /* flag_pic is a global option, but ix86_cmodel is target saved option
5126 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
5127 for PIC, or error out. */
5128 if (flag_pic)
5129 switch (ptr->x_ix86_cmodel)
5131 case CM_SMALL:
5132 ptr->x_ix86_cmodel = CM_SMALL_PIC;
5133 break;
5135 case CM_MEDIUM:
5136 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
5137 break;
5139 case CM_LARGE:
5140 ptr->x_ix86_cmodel = CM_LARGE_PIC;
5141 break;
5143 case CM_KERNEL:
5144 error ("code model %s does not support PIC mode", "kernel");
5145 break;
5147 default:
5148 break;
5150 else
5151 switch (ptr->x_ix86_cmodel)
5153 case CM_SMALL_PIC:
5154 ptr->x_ix86_cmodel = CM_SMALL;
5155 break;
5157 case CM_MEDIUM_PIC:
5158 ptr->x_ix86_cmodel = CM_MEDIUM;
5159 break;
5161 case CM_LARGE_PIC:
5162 ptr->x_ix86_cmodel = CM_LARGE;
5163 break;
5165 default:
5166 break;
5170 /* Print the current options */
5172 static void
5173 ix86_function_specific_print (FILE *file, int indent,
5174 struct cl_target_option *ptr)
5176 char *target_string
5177 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2,
5178 ptr->x_target_flags, ptr->x_ix86_target_flags,
5179 NULL, NULL, ptr->x_ix86_fpmath, false);
5181 gcc_assert (ptr->arch < PROCESSOR_max);
5182 fprintf (file, "%*sarch = %d (%s)\n",
5183 indent, "",
5184 ptr->arch, processor_target_table[ptr->arch].name);
5186 gcc_assert (ptr->tune < PROCESSOR_max);
5187 fprintf (file, "%*stune = %d (%s)\n",
5188 indent, "",
5189 ptr->tune, processor_target_table[ptr->tune].name);
5191 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5193 if (target_string)
5195 fprintf (file, "%*s%s\n", indent, "", target_string);
5196 free (target_string);
5201 /* Inner function to process the attribute((target(...))), take an argument and
5202 set the current options from the argument. If we have a list, recursively go
5203 over the list. */
5205 static bool
5206 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5207 struct gcc_options *opts,
5208 struct gcc_options *opts_set,
5209 struct gcc_options *enum_opts_set)
5211 char *next_optstr;
5212 bool ret = true;
5214 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5215 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5216 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5217 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5218 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5220 enum ix86_opt_type
5222 ix86_opt_unknown,
5223 ix86_opt_yes,
5224 ix86_opt_no,
5225 ix86_opt_str,
5226 ix86_opt_enum,
5227 ix86_opt_isa
5230 static const struct
5232 const char *string;
5233 size_t len;
5234 enum ix86_opt_type type;
5235 int opt;
5236 int mask;
5237 } attrs[] = {
5238 /* isa options */
5239 IX86_ATTR_ISA ("sgx", OPT_msgx),
5240 IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps),
5241 IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw),
5242 IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq),
5244 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5245 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5246 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
5247 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
5248 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
5249 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
5250 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
5251 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
5252 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
5253 IX86_ATTR_ISA ("avx2", OPT_mavx2),
5254 IX86_ATTR_ISA ("fma", OPT_mfma),
5255 IX86_ATTR_ISA ("xop", OPT_mxop),
5256 IX86_ATTR_ISA ("fma4", OPT_mfma4),
5257 IX86_ATTR_ISA ("f16c", OPT_mf16c),
5258 IX86_ATTR_ISA ("avx", OPT_mavx),
5259 IX86_ATTR_ISA ("sse4", OPT_msse4),
5260 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
5261 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
5262 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
5263 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
5264 IX86_ATTR_ISA ("sse3", OPT_msse3),
5265 IX86_ATTR_ISA ("aes", OPT_maes),
5266 IX86_ATTR_ISA ("sha", OPT_msha),
5267 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
5268 IX86_ATTR_ISA ("sse2", OPT_msse2),
5269 IX86_ATTR_ISA ("sse", OPT_msse),
5270 IX86_ATTR_ISA ("3dnowa", OPT_m3dnowa),
5271 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
5272 IX86_ATTR_ISA ("mmx", OPT_mmmx),
5273 IX86_ATTR_ISA ("rtm", OPT_mrtm),
5274 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
5275 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
5276 IX86_ATTR_ISA ("adx", OPT_madx),
5277 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5278 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5279 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
5280 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
5281 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
5282 IX86_ATTR_ISA ("xsave", OPT_mxsave),
5283 IX86_ATTR_ISA ("abm", OPT_mabm),
5284 IX86_ATTR_ISA ("bmi", OPT_mbmi),
5285 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
5286 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
5287 IX86_ATTR_ISA ("tbm", OPT_mtbm),
5288 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
5289 IX86_ATTR_ISA ("cx16", OPT_mcx16),
5290 IX86_ATTR_ISA ("sahf", OPT_msahf),
5291 IX86_ATTR_ISA ("movbe", OPT_mmovbe),
5292 IX86_ATTR_ISA ("crc32", OPT_mcrc32),
5293 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
5294 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
5295 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
5296 IX86_ATTR_ISA ("clzero", OPT_mclzero),
5297 IX86_ATTR_ISA ("pku", OPT_mpku),
5298 IX86_ATTR_ISA ("lwp", OPT_mlwp),
5299 IX86_ATTR_ISA ("hle", OPT_mhle),
5300 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
5301 IX86_ATTR_ISA ("mpx", OPT_mmpx),
5302 IX86_ATTR_ISA ("clwb", OPT_mclwb),
5303 IX86_ATTR_ISA ("rdpid", OPT_mrdpid),
5304 IX86_ATTR_ISA ("gfni", OPT_mgfni),
5305 IX86_ATTR_ISA ("ibt", OPT_mibt),
5306 IX86_ATTR_ISA ("shstk", OPT_mshstk),
5308 /* enum options */
5309 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
5311 /* string options */
5312 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
5313 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
5315 /* flag options */
5316 IX86_ATTR_YES ("cld",
5317 OPT_mcld,
5318 MASK_CLD),
5320 IX86_ATTR_NO ("fancy-math-387",
5321 OPT_mfancy_math_387,
5322 MASK_NO_FANCY_MATH_387),
5324 IX86_ATTR_YES ("ieee-fp",
5325 OPT_mieee_fp,
5326 MASK_IEEE_FP),
5328 IX86_ATTR_YES ("inline-all-stringops",
5329 OPT_minline_all_stringops,
5330 MASK_INLINE_ALL_STRINGOPS),
5332 IX86_ATTR_YES ("inline-stringops-dynamically",
5333 OPT_minline_stringops_dynamically,
5334 MASK_INLINE_STRINGOPS_DYNAMICALLY),
5336 IX86_ATTR_NO ("align-stringops",
5337 OPT_mno_align_stringops,
5338 MASK_NO_ALIGN_STRINGOPS),
5340 IX86_ATTR_YES ("recip",
5341 OPT_mrecip,
5342 MASK_RECIP),
5346 /* If this is a list, recurse to get the options. */
5347 if (TREE_CODE (args) == TREE_LIST)
5349 bool ret = true;
5351 for (; args; args = TREE_CHAIN (args))
5352 if (TREE_VALUE (args)
5353 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
5354 p_strings, opts, opts_set,
5355 enum_opts_set))
5356 ret = false;
5358 return ret;
5361 else if (TREE_CODE (args) != STRING_CST)
5363 error ("attribute %<target%> argument not a string");
5364 return false;
5367 /* Handle multiple arguments separated by commas. */
5368 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
5370 while (next_optstr && *next_optstr != '\0')
5372 char *p = next_optstr;
5373 char *orig_p = p;
5374 char *comma = strchr (next_optstr, ',');
5375 const char *opt_string;
5376 size_t len, opt_len;
5377 int opt;
5378 bool opt_set_p;
5379 char ch;
5380 unsigned i;
5381 enum ix86_opt_type type = ix86_opt_unknown;
5382 int mask = 0;
5384 if (comma)
5386 *comma = '\0';
5387 len = comma - next_optstr;
5388 next_optstr = comma + 1;
5390 else
5392 len = strlen (p);
5393 next_optstr = NULL;
5396 /* Recognize no-xxx. */
5397 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
5399 opt_set_p = false;
5400 p += 3;
5401 len -= 3;
5403 else
5404 opt_set_p = true;
5406 /* Find the option. */
5407 ch = *p;
5408 opt = N_OPTS;
5409 for (i = 0; i < ARRAY_SIZE (attrs); i++)
5411 type = attrs[i].type;
5412 opt_len = attrs[i].len;
5413 if (ch == attrs[i].string[0]
5414 && ((type != ix86_opt_str && type != ix86_opt_enum)
5415 ? len == opt_len
5416 : len > opt_len)
5417 && memcmp (p, attrs[i].string, opt_len) == 0)
5419 opt = attrs[i].opt;
5420 mask = attrs[i].mask;
5421 opt_string = attrs[i].string;
5422 break;
5426 /* Process the option. */
5427 if (opt == N_OPTS)
5429 error ("attribute(target(\"%s\")) is unknown", orig_p);
5430 ret = false;
5433 else if (type == ix86_opt_isa)
5435 struct cl_decoded_option decoded;
5437 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
5438 ix86_handle_option (opts, opts_set,
5439 &decoded, input_location);
5442 else if (type == ix86_opt_yes || type == ix86_opt_no)
5444 if (type == ix86_opt_no)
5445 opt_set_p = !opt_set_p;
5447 if (opt_set_p)
5448 opts->x_target_flags |= mask;
5449 else
5450 opts->x_target_flags &= ~mask;
5453 else if (type == ix86_opt_str)
5455 if (p_strings[opt])
5457 error ("option(\"%s\") was already specified", opt_string);
5458 ret = false;
5460 else
5461 p_strings[opt] = xstrdup (p + opt_len);
5464 else if (type == ix86_opt_enum)
5466 bool arg_ok;
5467 int value;
5469 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
5470 if (arg_ok)
5471 set_option (opts, enum_opts_set, opt, value,
5472 p + opt_len, DK_UNSPECIFIED, input_location,
5473 global_dc);
5474 else
5476 error ("attribute(target(\"%s\")) is unknown", orig_p);
5477 ret = false;
5481 else
5482 gcc_unreachable ();
5485 return ret;
5488 /* Release allocated strings. */
5489 static void
5490 release_options_strings (char **option_strings)
5492 /* Free up memory allocated to hold the strings */
5493 for (unsigned i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5494 free (option_strings[i]);
5497 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
5499 tree
5500 ix86_valid_target_attribute_tree (tree args,
5501 struct gcc_options *opts,
5502 struct gcc_options *opts_set)
5504 const char *orig_arch_string = opts->x_ix86_arch_string;
5505 const char *orig_tune_string = opts->x_ix86_tune_string;
5506 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
5507 int orig_tune_defaulted = ix86_tune_defaulted;
5508 int orig_arch_specified = ix86_arch_specified;
5509 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
5510 tree t = NULL_TREE;
5511 struct cl_target_option *def
5512 = TREE_TARGET_OPTION (target_option_default_node);
5513 struct gcc_options enum_opts_set;
5515 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
5517 /* Process each of the options on the chain. */
5518 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
5519 opts_set, &enum_opts_set))
5520 return error_mark_node;
5522 /* If the changed options are different from the default, rerun
5523 ix86_option_override_internal, and then save the options away.
5524 The string options are attribute options, and will be undone
5525 when we copy the save structure. */
5526 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
5527 || opts->x_ix86_isa_flags2 != def->x_ix86_isa_flags2
5528 || opts->x_target_flags != def->x_target_flags
5529 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
5530 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
5531 || enum_opts_set.x_ix86_fpmath)
5533 /* If we are using the default tune= or arch=, undo the string assigned,
5534 and use the default. */
5535 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
5537 opts->x_ix86_arch_string
5538 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
5540 /* If arch= is set, clear all bits in x_ix86_isa_flags,
5541 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
5542 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
5543 | OPTION_MASK_ABI_64
5544 | OPTION_MASK_ABI_X32
5545 | OPTION_MASK_CODE16);
5546 opts->x_ix86_isa_flags2 = 0;
5548 else if (!orig_arch_specified)
5549 opts->x_ix86_arch_string = NULL;
5551 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5552 opts->x_ix86_tune_string
5553 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
5554 else if (orig_tune_defaulted)
5555 opts->x_ix86_tune_string = NULL;
5557 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5558 if (enum_opts_set.x_ix86_fpmath)
5559 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5561 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5562 bool r = ix86_option_override_internal (false, opts, opts_set);
5563 if (!r)
5565 release_options_strings (option_strings);
5566 return error_mark_node;
5569 /* Add any builtin functions with the new isa if any. */
5570 ix86_add_new_builtins (opts->x_ix86_isa_flags, opts->x_ix86_isa_flags2);
5572 /* Save the current options unless we are validating options for
5573 #pragma. */
5574 t = build_target_option_node (opts);
5576 opts->x_ix86_arch_string = orig_arch_string;
5577 opts->x_ix86_tune_string = orig_tune_string;
5578 opts_set->x_ix86_fpmath = orig_fpmath_set;
5580 release_options_strings (option_strings);
5583 return t;
5586 /* Hook to validate attribute((target("string"))). */
5588 static bool
5589 ix86_valid_target_attribute_p (tree fndecl,
5590 tree ARG_UNUSED (name),
5591 tree args,
5592 int ARG_UNUSED (flags))
5594 struct gcc_options func_options;
5595 tree new_target, new_optimize;
5596 bool ret = true;
5598 /* attribute((target("default"))) does nothing, beyond
5599 affecting multi-versioning. */
5600 if (TREE_VALUE (args)
5601 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5602 && TREE_CHAIN (args) == NULL_TREE
5603 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5604 return true;
5606 tree old_optimize = build_optimization_node (&global_options);
5608 /* Get the optimization options of the current function. */
5609 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5611 if (!func_optimize)
5612 func_optimize = old_optimize;
5614 /* Init func_options. */
5615 memset (&func_options, 0, sizeof (func_options));
5616 init_options_struct (&func_options, NULL);
5617 lang_hooks.init_options_struct (&func_options);
5619 cl_optimization_restore (&func_options,
5620 TREE_OPTIMIZATION (func_optimize));
5622 /* Initialize func_options to the default before its target options can
5623 be set. */
5624 cl_target_option_restore (&func_options,
5625 TREE_TARGET_OPTION (target_option_default_node));
5627 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5628 &global_options_set);
5630 new_optimize = build_optimization_node (&func_options);
5632 if (new_target == error_mark_node)
5633 ret = false;
5635 else if (fndecl && new_target)
5637 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5639 if (old_optimize != new_optimize)
5640 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5643 finalize_options_struct (&func_options);
5645 return ret;
5649 /* Hook to determine if one function can safely inline another. */
5651 static bool
5652 ix86_can_inline_p (tree caller, tree callee)
5654 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5655 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5656 if (!callee_tree)
5657 callee_tree = target_option_default_node;
5658 if (!caller_tree)
5659 caller_tree = target_option_default_node;
5660 if (callee_tree == caller_tree)
5661 return true;
5663 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5664 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5665 bool ret = false;
5667 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
5668 function can inline a SSE2 function but a SSE2 function can't inline
5669 a SSE4 function. */
5670 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5671 != callee_opts->x_ix86_isa_flags)
5672 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
5673 != callee_opts->x_ix86_isa_flags2))
5674 ret = false;
5676 /* See if we have the same non-isa options. */
5677 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5678 ret = false;
5680 /* See if arch, tune, etc. are the same. */
5681 else if (caller_opts->arch != callee_opts->arch)
5682 ret = false;
5684 else if (caller_opts->tune != callee_opts->tune)
5685 ret = false;
5687 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
5688 /* If the calle doesn't use FP expressions differences in
5689 ix86_fpmath can be ignored. We are called from FEs
5690 for multi-versioning call optimization, so beware of
5691 ipa_fn_summaries not available. */
5692 && (! ipa_fn_summaries
5693 || ipa_fn_summaries->get
5694 (cgraph_node::get (callee))->fp_expressions))
5695 ret = false;
5697 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5698 ret = false;
5700 else
5701 ret = true;
5703 return ret;
5707 /* Remember the last target of ix86_set_current_function. */
5708 static GTY(()) tree ix86_previous_fndecl;
5710 /* Set targets globals to the default (or current #pragma GCC target
5711 if active). Invalidate ix86_previous_fndecl cache. */
5713 void
5714 ix86_reset_previous_fndecl (void)
5716 tree new_tree = target_option_current_node;
5717 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5718 if (TREE_TARGET_GLOBALS (new_tree))
5719 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5720 else if (new_tree == target_option_default_node)
5721 restore_target_globals (&default_target_globals);
5722 else
5723 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5724 ix86_previous_fndecl = NULL_TREE;
5727 /* Set the func_type field from the function FNDECL. */
5729 static void
5730 ix86_set_func_type (tree fndecl)
5732 if (cfun->machine->func_type == TYPE_UNKNOWN)
5734 if (lookup_attribute ("interrupt",
5735 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5737 if (ix86_function_naked (fndecl))
5738 error_at (DECL_SOURCE_LOCATION (fndecl),
5739 "interrupt and naked attributes are not compatible");
5741 int nargs = 0;
5742 for (tree arg = DECL_ARGUMENTS (fndecl);
5743 arg;
5744 arg = TREE_CHAIN (arg))
5745 nargs++;
5746 cfun->machine->no_caller_saved_registers = true;
5747 cfun->machine->func_type
5748 = nargs == 2 ? TYPE_EXCEPTION : TYPE_INTERRUPT;
5750 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
5752 /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument. */
5753 if (write_symbols != NO_DEBUG && write_symbols != DWARF2_DEBUG)
5754 sorry ("Only DWARF debug format is supported for interrupt "
5755 "service routine.");
5757 else
5759 cfun->machine->func_type = TYPE_NORMAL;
5760 if (lookup_attribute ("no_caller_saved_registers",
5761 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5762 cfun->machine->no_caller_saved_registers = true;
5767 /* Establish appropriate back-end context for processing the function
5768 FNDECL. The argument might be NULL to indicate processing at top
5769 level, outside of any function scope. */
5770 static void
5771 ix86_set_current_function (tree fndecl)
5773 /* Only change the context if the function changes. This hook is called
5774 several times in the course of compiling a function, and we don't want to
5775 slow things down too much or call target_reinit when it isn't safe. */
5776 if (fndecl == ix86_previous_fndecl)
5778 /* There may be 2 function bodies for the same function FNDECL,
5779 one is extern inline and one isn't. Call ix86_set_func_type
5780 to set the func_type field. */
5781 if (fndecl != NULL_TREE)
5782 ix86_set_func_type (fndecl);
5783 return;
5786 tree old_tree;
5787 if (ix86_previous_fndecl == NULL_TREE)
5788 old_tree = target_option_current_node;
5789 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5790 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5791 else
5792 old_tree = target_option_default_node;
5794 if (fndecl == NULL_TREE)
5796 if (old_tree != target_option_current_node)
5797 ix86_reset_previous_fndecl ();
5798 return;
5801 ix86_set_func_type (fndecl);
5803 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5804 if (new_tree == NULL_TREE)
5805 new_tree = target_option_default_node;
5807 if (old_tree != new_tree)
5809 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5810 if (TREE_TARGET_GLOBALS (new_tree))
5811 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5812 else if (new_tree == target_option_default_node)
5813 restore_target_globals (&default_target_globals);
5814 else
5815 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5817 ix86_previous_fndecl = fndecl;
5819 static bool prev_no_caller_saved_registers;
5821 /* 64-bit MS and SYSV ABI have different set of call used registers.
5822 Avoid expensive re-initialization of init_regs each time we switch
5823 function context. */
5824 if (TARGET_64BIT
5825 && (call_used_regs[SI_REG]
5826 == (cfun->machine->call_abi == MS_ABI)))
5827 reinit_regs ();
5828 /* Need to re-initialize init_regs if caller-saved registers are
5829 changed. */
5830 else if (prev_no_caller_saved_registers
5831 != cfun->machine->no_caller_saved_registers)
5832 reinit_regs ();
5834 if (cfun->machine->func_type != TYPE_NORMAL
5835 || cfun->machine->no_caller_saved_registers)
5837 /* Don't allow MPX, SSE, MMX nor x87 instructions since they
5838 may change processor state. */
5839 const char *isa;
5840 if (TARGET_MPX)
5841 isa = "MPX";
5842 else if (TARGET_SSE)
5843 isa = "SSE";
5844 else if (TARGET_MMX)
5845 isa = "MMX/3Dnow";
5846 else if (TARGET_80387)
5847 isa = "80387";
5848 else
5849 isa = NULL;
5850 if (isa != NULL)
5852 if (cfun->machine->func_type != TYPE_NORMAL)
5853 sorry ("%s instructions aren't allowed in %s service routine",
5854 isa, (cfun->machine->func_type == TYPE_EXCEPTION
5855 ? "exception" : "interrupt"));
5856 else
5857 sorry ("%s instructions aren't allowed in function with "
5858 "no_caller_saved_registers attribute", isa);
5859 /* Don't issue the same error twice. */
5860 cfun->machine->func_type = TYPE_NORMAL;
5861 cfun->machine->no_caller_saved_registers = false;
5865 prev_no_caller_saved_registers
5866 = cfun->machine->no_caller_saved_registers;
5870 /* Return true if this goes in large data/bss. */
5872 static bool
5873 ix86_in_large_data_p (tree exp)
5875 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5876 return false;
5878 if (exp == NULL_TREE)
5879 return false;
5881 /* Functions are never large data. */
5882 if (TREE_CODE (exp) == FUNCTION_DECL)
5883 return false;
5885 /* Automatic variables are never large data. */
5886 if (VAR_P (exp) && !is_global_var (exp))
5887 return false;
5889 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
5891 const char *section = DECL_SECTION_NAME (exp);
5892 if (strcmp (section, ".ldata") == 0
5893 || strcmp (section, ".lbss") == 0)
5894 return true;
5895 return false;
5897 else
5899 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5901 /* If this is an incomplete type with size 0, then we can't put it
5902 in data because it might be too big when completed. Also,
5903 int_size_in_bytes returns -1 if size can vary or is larger than
5904 an integer in which case also it is safer to assume that it goes in
5905 large data. */
5906 if (size <= 0 || size > ix86_section_threshold)
5907 return true;
5910 return false;
5913 /* i386-specific section flag to mark large sections. */
5914 #define SECTION_LARGE SECTION_MACH_DEP
5916 /* Switch to the appropriate section for output of DECL.
5917 DECL is either a `VAR_DECL' node or a constant of some sort.
5918 RELOC indicates whether forming the initial value of DECL requires
5919 link-time relocations. */
5921 ATTRIBUTE_UNUSED static section *
5922 x86_64_elf_select_section (tree decl, int reloc,
5923 unsigned HOST_WIDE_INT align)
5925 if (ix86_in_large_data_p (decl))
5927 const char *sname = NULL;
5928 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
5929 switch (categorize_decl_for_section (decl, reloc))
5931 case SECCAT_DATA:
5932 sname = ".ldata";
5933 break;
5934 case SECCAT_DATA_REL:
5935 sname = ".ldata.rel";
5936 break;
5937 case SECCAT_DATA_REL_LOCAL:
5938 sname = ".ldata.rel.local";
5939 break;
5940 case SECCAT_DATA_REL_RO:
5941 sname = ".ldata.rel.ro";
5942 break;
5943 case SECCAT_DATA_REL_RO_LOCAL:
5944 sname = ".ldata.rel.ro.local";
5945 break;
5946 case SECCAT_BSS:
5947 sname = ".lbss";
5948 flags |= SECTION_BSS;
5949 break;
5950 case SECCAT_RODATA:
5951 case SECCAT_RODATA_MERGE_STR:
5952 case SECCAT_RODATA_MERGE_STR_INIT:
5953 case SECCAT_RODATA_MERGE_CONST:
5954 sname = ".lrodata";
5955 flags &= ~SECTION_WRITE;
5956 break;
5957 case SECCAT_SRODATA:
5958 case SECCAT_SDATA:
5959 case SECCAT_SBSS:
5960 gcc_unreachable ();
5961 case SECCAT_TEXT:
5962 case SECCAT_TDATA:
5963 case SECCAT_TBSS:
5964 /* We don't split these for medium model. Place them into
5965 default sections and hope for best. */
5966 break;
5968 if (sname)
5970 /* We might get called with string constants, but get_named_section
5971 doesn't like them as they are not DECLs. Also, we need to set
5972 flags in that case. */
5973 if (!DECL_P (decl))
5974 return get_section (sname, flags, NULL);
5975 return get_named_section (decl, sname, reloc);
5978 return default_elf_select_section (decl, reloc, align);
5981 /* Select a set of attributes for section NAME based on the properties
5982 of DECL and whether or not RELOC indicates that DECL's initializer
5983 might contain runtime relocations. */
5985 static unsigned int ATTRIBUTE_UNUSED
5986 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5988 unsigned int flags = default_section_type_flags (decl, name, reloc);
5990 if (ix86_in_large_data_p (decl))
5991 flags |= SECTION_LARGE;
5993 if (decl == NULL_TREE
5994 && (strcmp (name, ".ldata.rel.ro") == 0
5995 || strcmp (name, ".ldata.rel.ro.local") == 0))
5996 flags |= SECTION_RELRO;
5998 if (strcmp (name, ".lbss") == 0
5999 || strncmp (name, ".lbss.", 5) == 0
6000 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
6001 flags |= SECTION_BSS;
6003 return flags;
6006 /* Build up a unique section name, expressed as a
6007 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
6008 RELOC indicates whether the initial value of EXP requires
6009 link-time relocations. */
6011 static void ATTRIBUTE_UNUSED
6012 x86_64_elf_unique_section (tree decl, int reloc)
6014 if (ix86_in_large_data_p (decl))
6016 const char *prefix = NULL;
6017 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
6018 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
6020 switch (categorize_decl_for_section (decl, reloc))
6022 case SECCAT_DATA:
6023 case SECCAT_DATA_REL:
6024 case SECCAT_DATA_REL_LOCAL:
6025 case SECCAT_DATA_REL_RO:
6026 case SECCAT_DATA_REL_RO_LOCAL:
6027 prefix = one_only ? ".ld" : ".ldata";
6028 break;
6029 case SECCAT_BSS:
6030 prefix = one_only ? ".lb" : ".lbss";
6031 break;
6032 case SECCAT_RODATA:
6033 case SECCAT_RODATA_MERGE_STR:
6034 case SECCAT_RODATA_MERGE_STR_INIT:
6035 case SECCAT_RODATA_MERGE_CONST:
6036 prefix = one_only ? ".lr" : ".lrodata";
6037 break;
6038 case SECCAT_SRODATA:
6039 case SECCAT_SDATA:
6040 case SECCAT_SBSS:
6041 gcc_unreachable ();
6042 case SECCAT_TEXT:
6043 case SECCAT_TDATA:
6044 case SECCAT_TBSS:
6045 /* We don't split these for medium model. Place them into
6046 default sections and hope for best. */
6047 break;
6049 if (prefix)
6051 const char *name, *linkonce;
6052 char *string;
6054 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6055 name = targetm.strip_name_encoding (name);
6057 /* If we're using one_only, then there needs to be a .gnu.linkonce
6058 prefix to the section name. */
6059 linkonce = one_only ? ".gnu.linkonce" : "";
6061 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6063 set_decl_section_name (decl, string);
6064 return;
6067 default_unique_section (decl, reloc);
6070 #ifdef COMMON_ASM_OP
6072 #ifndef LARGECOMM_SECTION_ASM_OP
6073 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
6074 #endif
6076 /* This says how to output assembler code to declare an
6077 uninitialized external linkage data object.
6079 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
6080 large objects. */
6081 void
6082 x86_elf_aligned_decl_common (FILE *file, tree decl,
6083 const char *name, unsigned HOST_WIDE_INT size,
6084 int align)
6086 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6087 && size > (unsigned int)ix86_section_threshold)
6089 switch_to_section (get_named_section (decl, ".lbss", 0));
6090 fputs (LARGECOMM_SECTION_ASM_OP, file);
6092 else
6093 fputs (COMMON_ASM_OP, file);
6094 assemble_name (file, name);
6095 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
6096 size, align / BITS_PER_UNIT);
6098 #endif
6100 /* Utility function for targets to use in implementing
6101 ASM_OUTPUT_ALIGNED_BSS. */
6103 void
6104 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
6105 unsigned HOST_WIDE_INT size, int align)
6107 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6108 && size > (unsigned int)ix86_section_threshold)
6109 switch_to_section (get_named_section (decl, ".lbss", 0));
6110 else
6111 switch_to_section (bss_section);
6112 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
6113 #ifdef ASM_DECLARE_OBJECT_NAME
6114 last_assemble_variable_decl = decl;
6115 ASM_DECLARE_OBJECT_NAME (file, name, decl);
6116 #else
6117 /* Standard thing is just output label for the object. */
6118 ASM_OUTPUT_LABEL (file, name);
6119 #endif /* ASM_DECLARE_OBJECT_NAME */
6120 ASM_OUTPUT_SKIP (file, size ? size : 1);
6123 /* Decide whether we must probe the stack before any space allocation
6124 on this target. It's essentially TARGET_STACK_PROBE except when
6125 -fstack-check causes the stack to be already probed differently. */
6127 bool
6128 ix86_target_stack_probe (void)
6130 /* Do not probe the stack twice if static stack checking is enabled. */
6131 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6132 return false;
6134 return TARGET_STACK_PROBE;
6137 /* Decide whether we can make a sibling call to a function. DECL is the
6138 declaration of the function being targeted by the call and EXP is the
6139 CALL_EXPR representing the call. */
6141 static bool
6142 ix86_function_ok_for_sibcall (tree decl, tree exp)
6144 tree type, decl_or_type;
6145 rtx a, b;
6146 bool bind_global = decl && !targetm.binds_local_p (decl);
6148 if (ix86_function_naked (current_function_decl))
6149 return false;
6151 /* Sibling call isn't OK if there are no caller-saved registers
6152 since all registers must be preserved before return. */
6153 if (cfun->machine->no_caller_saved_registers)
6154 return false;
6156 /* If we are generating position-independent code, we cannot sibcall
6157 optimize direct calls to global functions, as the PLT requires
6158 %ebx be live. (Darwin does not have a PLT.) */
6159 if (!TARGET_MACHO
6160 && !TARGET_64BIT
6161 && flag_pic
6162 && flag_plt
6163 && bind_global)
6164 return false;
6166 /* If we need to align the outgoing stack, then sibcalling would
6167 unalign the stack, which may break the called function. */
6168 if (ix86_minimum_incoming_stack_boundary (true)
6169 < PREFERRED_STACK_BOUNDARY)
6170 return false;
6172 if (decl)
6174 decl_or_type = decl;
6175 type = TREE_TYPE (decl);
6177 else
6179 /* We're looking at the CALL_EXPR, we need the type of the function. */
6180 type = CALL_EXPR_FN (exp); /* pointer expression */
6181 type = TREE_TYPE (type); /* pointer type */
6182 type = TREE_TYPE (type); /* function type */
6183 decl_or_type = type;
6186 /* Check that the return value locations are the same. Like
6187 if we are returning floats on the 80387 register stack, we cannot
6188 make a sibcall from a function that doesn't return a float to a
6189 function that does or, conversely, from a function that does return
6190 a float to a function that doesn't; the necessary stack adjustment
6191 would not be executed. This is also the place we notice
6192 differences in the return value ABI. Note that it is ok for one
6193 of the functions to have void return type as long as the return
6194 value of the other is passed in a register. */
6195 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6196 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6197 cfun->decl, false);
6198 if (STACK_REG_P (a) || STACK_REG_P (b))
6200 if (!rtx_equal_p (a, b))
6201 return false;
6203 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6205 else if (!rtx_equal_p (a, b))
6206 return false;
6208 if (TARGET_64BIT)
6210 /* The SYSV ABI has more call-clobbered registers;
6211 disallow sibcalls from MS to SYSV. */
6212 if (cfun->machine->call_abi == MS_ABI
6213 && ix86_function_type_abi (type) == SYSV_ABI)
6214 return false;
6216 else
6218 /* If this call is indirect, we'll need to be able to use a
6219 call-clobbered register for the address of the target function.
6220 Make sure that all such registers are not used for passing
6221 parameters. Note that DLLIMPORT functions and call to global
6222 function via GOT slot are indirect. */
6223 if (!decl
6224 || (bind_global && flag_pic && !flag_plt)
6225 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
6227 /* Check if regparm >= 3 since arg_reg_available is set to
6228 false if regparm == 0. If regparm is 1 or 2, there is
6229 always a call-clobbered register available.
6231 ??? The symbol indirect call doesn't need a call-clobbered
6232 register. But we don't know if this is a symbol indirect
6233 call or not here. */
6234 if (ix86_function_regparm (type, NULL) >= 3
6235 && !cfun->machine->arg_reg_available)
6236 return false;
6240 /* Otherwise okay. That also includes certain types of indirect calls. */
6241 return true;
6244 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6245 and "sseregparm" calling convention attributes;
6246 arguments as in struct attribute_spec.handler. */
6248 static tree
6249 ix86_handle_cconv_attribute (tree *node, tree name,
6250 tree args,
6251 int,
6252 bool *no_add_attrs)
6254 if (TREE_CODE (*node) != FUNCTION_TYPE
6255 && TREE_CODE (*node) != METHOD_TYPE
6256 && TREE_CODE (*node) != FIELD_DECL
6257 && TREE_CODE (*node) != TYPE_DECL)
6259 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6260 name);
6261 *no_add_attrs = true;
6262 return NULL_TREE;
6265 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6266 if (is_attribute_p ("regparm", name))
6268 tree cst;
6270 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6272 error ("fastcall and regparm attributes are not compatible");
6275 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6277 error ("regparam and thiscall attributes are not compatible");
6280 cst = TREE_VALUE (args);
6281 if (TREE_CODE (cst) != INTEGER_CST)
6283 warning (OPT_Wattributes,
6284 "%qE attribute requires an integer constant argument",
6285 name);
6286 *no_add_attrs = true;
6288 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6290 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6291 name, REGPARM_MAX);
6292 *no_add_attrs = true;
6295 return NULL_TREE;
6298 if (TARGET_64BIT)
6300 /* Do not warn when emulating the MS ABI. */
6301 if ((TREE_CODE (*node) != FUNCTION_TYPE
6302 && TREE_CODE (*node) != METHOD_TYPE)
6303 || ix86_function_type_abi (*node) != MS_ABI)
6304 warning (OPT_Wattributes, "%qE attribute ignored",
6305 name);
6306 *no_add_attrs = true;
6307 return NULL_TREE;
6310 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6311 if (is_attribute_p ("fastcall", name))
6313 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6315 error ("fastcall and cdecl attributes are not compatible");
6317 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6319 error ("fastcall and stdcall attributes are not compatible");
6321 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6323 error ("fastcall and regparm attributes are not compatible");
6325 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6327 error ("fastcall and thiscall attributes are not compatible");
6331 /* Can combine stdcall with fastcall (redundant), regparm and
6332 sseregparm. */
6333 else if (is_attribute_p ("stdcall", name))
6335 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6337 error ("stdcall and cdecl attributes are not compatible");
6339 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6341 error ("stdcall and fastcall attributes are not compatible");
6343 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6345 error ("stdcall and thiscall attributes are not compatible");
6349 /* Can combine cdecl with regparm and sseregparm. */
6350 else if (is_attribute_p ("cdecl", name))
6352 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6354 error ("stdcall and cdecl attributes are not compatible");
6356 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6358 error ("fastcall and cdecl attributes are not compatible");
6360 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6362 error ("cdecl and thiscall attributes are not compatible");
6365 else if (is_attribute_p ("thiscall", name))
6367 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6368 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6369 name);
6370 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6372 error ("stdcall and thiscall attributes are not compatible");
6374 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6376 error ("fastcall and thiscall attributes are not compatible");
6378 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6380 error ("cdecl and thiscall attributes are not compatible");
6384 /* Can combine sseregparm with all attributes. */
6386 return NULL_TREE;
6389 /* The transactional memory builtins are implicitly regparm or fastcall
6390 depending on the ABI. Override the generic do-nothing attribute that
6391 these builtins were declared with, and replace it with one of the two
6392 attributes that we expect elsewhere. */
6394 static tree
6395 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6396 int flags, bool *no_add_attrs)
6398 tree alt;
6400 /* In no case do we want to add the placeholder attribute. */
6401 *no_add_attrs = true;
6403 /* The 64-bit ABI is unchanged for transactional memory. */
6404 if (TARGET_64BIT)
6405 return NULL_TREE;
6407 /* ??? Is there a better way to validate 32-bit windows? We have
6408 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6409 if (CHECK_STACK_LIMIT > 0)
6410 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6411 else
6413 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6414 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6416 decl_attributes (node, alt, flags);
6418 return NULL_TREE;
6421 /* This function determines from TYPE the calling-convention. */
6423 unsigned int
6424 ix86_get_callcvt (const_tree type)
6426 unsigned int ret = 0;
6427 bool is_stdarg;
6428 tree attrs;
6430 if (TARGET_64BIT)
6431 return IX86_CALLCVT_CDECL;
6433 attrs = TYPE_ATTRIBUTES (type);
6434 if (attrs != NULL_TREE)
6436 if (lookup_attribute ("cdecl", attrs))
6437 ret |= IX86_CALLCVT_CDECL;
6438 else if (lookup_attribute ("stdcall", attrs))
6439 ret |= IX86_CALLCVT_STDCALL;
6440 else if (lookup_attribute ("fastcall", attrs))
6441 ret |= IX86_CALLCVT_FASTCALL;
6442 else if (lookup_attribute ("thiscall", attrs))
6443 ret |= IX86_CALLCVT_THISCALL;
6445 /* Regparam isn't allowed for thiscall and fastcall. */
6446 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
6448 if (lookup_attribute ("regparm", attrs))
6449 ret |= IX86_CALLCVT_REGPARM;
6450 if (lookup_attribute ("sseregparm", attrs))
6451 ret |= IX86_CALLCVT_SSEREGPARM;
6454 if (IX86_BASE_CALLCVT(ret) != 0)
6455 return ret;
6458 is_stdarg = stdarg_p (type);
6459 if (TARGET_RTD && !is_stdarg)
6460 return IX86_CALLCVT_STDCALL | ret;
6462 if (ret != 0
6463 || is_stdarg
6464 || TREE_CODE (type) != METHOD_TYPE
6465 || ix86_function_type_abi (type) != MS_ABI)
6466 return IX86_CALLCVT_CDECL | ret;
6468 return IX86_CALLCVT_THISCALL;
6471 /* Return 0 if the attributes for two types are incompatible, 1 if they
6472 are compatible, and 2 if they are nearly compatible (which causes a
6473 warning to be generated). */
6475 static int
6476 ix86_comp_type_attributes (const_tree type1, const_tree type2)
6478 unsigned int ccvt1, ccvt2;
6480 if (TREE_CODE (type1) != FUNCTION_TYPE
6481 && TREE_CODE (type1) != METHOD_TYPE)
6482 return 1;
6484 ccvt1 = ix86_get_callcvt (type1);
6485 ccvt2 = ix86_get_callcvt (type2);
6486 if (ccvt1 != ccvt2)
6487 return 0;
6488 if (ix86_function_regparm (type1, NULL)
6489 != ix86_function_regparm (type2, NULL))
6490 return 0;
6492 return 1;
6495 /* Return the regparm value for a function with the indicated TYPE and DECL.
6496 DECL may be NULL when calling function indirectly
6497 or considering a libcall. */
6499 static int
6500 ix86_function_regparm (const_tree type, const_tree decl)
6502 tree attr;
6503 int regparm;
6504 unsigned int ccvt;
6506 if (TARGET_64BIT)
6507 return (ix86_function_type_abi (type) == SYSV_ABI
6508 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6509 ccvt = ix86_get_callcvt (type);
6510 regparm = ix86_regparm;
6512 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
6514 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
6515 if (attr)
6517 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
6518 return regparm;
6521 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6522 return 2;
6523 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6524 return 1;
6526 /* Use register calling convention for local functions when possible. */
6527 if (decl
6528 && TREE_CODE (decl) == FUNCTION_DECL)
6530 cgraph_node *target = cgraph_node::get (decl);
6531 if (target)
6532 target = target->function_symbol ();
6534 /* Caller and callee must agree on the calling convention, so
6535 checking here just optimize means that with
6536 __attribute__((optimize (...))) caller could use regparm convention
6537 and callee not, or vice versa. Instead look at whether the callee
6538 is optimized or not. */
6539 if (target && opt_for_fn (target->decl, optimize)
6540 && !(profile_flag && !flag_fentry))
6542 cgraph_local_info *i = &target->local;
6543 if (i && i->local && i->can_change_signature)
6545 int local_regparm, globals = 0, regno;
6547 /* Make sure no regparm register is taken by a
6548 fixed register variable. */
6549 for (local_regparm = 0; local_regparm < REGPARM_MAX;
6550 local_regparm++)
6551 if (fixed_regs[local_regparm])
6552 break;
6554 /* We don't want to use regparm(3) for nested functions as
6555 these use a static chain pointer in the third argument. */
6556 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
6557 local_regparm = 2;
6559 /* Save a register for the split stack. */
6560 if (flag_split_stack)
6562 if (local_regparm == 3)
6563 local_regparm = 2;
6564 else if (local_regparm == 2
6565 && DECL_STATIC_CHAIN (target->decl))
6566 local_regparm = 1;
6569 /* Each fixed register usage increases register pressure,
6570 so less registers should be used for argument passing.
6571 This functionality can be overriden by an explicit
6572 regparm value. */
6573 for (regno = AX_REG; regno <= DI_REG; regno++)
6574 if (fixed_regs[regno])
6575 globals++;
6577 local_regparm
6578 = globals < local_regparm ? local_regparm - globals : 0;
6580 if (local_regparm > regparm)
6581 regparm = local_regparm;
6586 return regparm;
6589 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
6590 DFmode (2) arguments in SSE registers for a function with the
6591 indicated TYPE and DECL. DECL may be NULL when calling function
6592 indirectly or considering a libcall. Return -1 if any FP parameter
6593 should be rejected by error. This is used in siutation we imply SSE
6594 calling convetion but the function is called from another function with
6595 SSE disabled. Otherwise return 0. */
6597 static int
6598 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
6600 gcc_assert (!TARGET_64BIT);
6602 /* Use SSE registers to pass SFmode and DFmode arguments if requested
6603 by the sseregparm attribute. */
6604 if (TARGET_SSEREGPARM
6605 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
6607 if (!TARGET_SSE)
6609 if (warn)
6611 if (decl)
6612 error ("calling %qD with attribute sseregparm without "
6613 "SSE/SSE2 enabled", decl);
6614 else
6615 error ("calling %qT with attribute sseregparm without "
6616 "SSE/SSE2 enabled", type);
6618 return 0;
6621 return 2;
6624 if (!decl)
6625 return 0;
6627 cgraph_node *target = cgraph_node::get (decl);
6628 if (target)
6629 target = target->function_symbol ();
6631 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
6632 (and DFmode for SSE2) arguments in SSE registers. */
6633 if (target
6634 /* TARGET_SSE_MATH */
6635 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
6636 && opt_for_fn (target->decl, optimize)
6637 && !(profile_flag && !flag_fentry))
6639 cgraph_local_info *i = &target->local;
6640 if (i && i->local && i->can_change_signature)
6642 /* Refuse to produce wrong code when local function with SSE enabled
6643 is called from SSE disabled function.
6644 FIXME: We need a way to detect these cases cross-ltrans partition
6645 and avoid using SSE calling conventions on local functions called
6646 from function with SSE disabled. For now at least delay the
6647 warning until we know we are going to produce wrong code.
6648 See PR66047 */
6649 if (!TARGET_SSE && warn)
6650 return -1;
6651 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
6652 ->x_ix86_isa_flags) ? 2 : 1;
6656 return 0;
6659 /* Return true if EAX is live at the start of the function. Used by
6660 ix86_expand_prologue to determine if we need special help before
6661 calling allocate_stack_worker. */
6663 static bool
6664 ix86_eax_live_at_start_p (void)
6666 /* Cheat. Don't bother working forward from ix86_function_regparm
6667 to the function type to whether an actual argument is located in
6668 eax. Instead just look at cfg info, which is still close enough
6669 to correct at this point. This gives false positives for broken
6670 functions that might use uninitialized data that happens to be
6671 allocated in eax, but who cares? */
6672 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
6675 static bool
6676 ix86_keep_aggregate_return_pointer (tree fntype)
6678 tree attr;
6680 if (!TARGET_64BIT)
6682 attr = lookup_attribute ("callee_pop_aggregate_return",
6683 TYPE_ATTRIBUTES (fntype));
6684 if (attr)
6685 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6687 /* For 32-bit MS-ABI the default is to keep aggregate
6688 return pointer. */
6689 if (ix86_function_type_abi (fntype) == MS_ABI)
6690 return true;
6692 return KEEP_AGGREGATE_RETURN_POINTER != 0;
6695 /* Value is the number of bytes of arguments automatically
6696 popped when returning from a subroutine call.
6697 FUNDECL is the declaration node of the function (as a tree),
6698 FUNTYPE is the data type of the function (as a tree),
6699 or for a library call it is an identifier node for the subroutine name.
6700 SIZE is the number of bytes of arguments passed on the stack.
6702 On the 80386, the RTD insn may be used to pop them if the number
6703 of args is fixed, but if the number is variable then the caller
6704 must pop them all. RTD can't be used for library calls now
6705 because the library is compiled with the Unix compiler.
6706 Use of RTD is a selectable option, since it is incompatible with
6707 standard Unix calling sequences. If the option is not selected,
6708 the caller must always pop the args.
6710 The attribute stdcall is equivalent to RTD on a per module basis. */
6712 static int
6713 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6715 unsigned int ccvt;
6717 /* None of the 64-bit ABIs pop arguments. */
6718 if (TARGET_64BIT)
6719 return 0;
6721 ccvt = ix86_get_callcvt (funtype);
6723 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6724 | IX86_CALLCVT_THISCALL)) != 0
6725 && ! stdarg_p (funtype))
6726 return size;
6728 /* Lose any fake structure return argument if it is passed on the stack. */
6729 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6730 && !ix86_keep_aggregate_return_pointer (funtype))
6732 int nregs = ix86_function_regparm (funtype, fundecl);
6733 if (nregs == 0)
6734 return GET_MODE_SIZE (Pmode);
6737 return 0;
6740 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6742 static bool
6743 ix86_legitimate_combined_insn (rtx_insn *insn)
6745 int i;
6747 /* Check operand constraints in case hard registers were propagated
6748 into insn pattern. This check prevents combine pass from
6749 generating insn patterns with invalid hard register operands.
6750 These invalid insns can eventually confuse reload to error out
6751 with a spill failure. See also PRs 46829 and 46843. */
6753 gcc_assert (INSN_CODE (insn) >= 0);
6755 extract_insn (insn);
6756 preprocess_constraints (insn);
6758 int n_operands = recog_data.n_operands;
6759 int n_alternatives = recog_data.n_alternatives;
6760 for (i = 0; i < n_operands; i++)
6762 rtx op = recog_data.operand[i];
6763 machine_mode mode = GET_MODE (op);
6764 const operand_alternative *op_alt;
6765 int offset = 0;
6766 bool win;
6767 int j;
6769 /* A unary operator may be accepted by the predicate, but it
6770 is irrelevant for matching constraints. */
6771 if (UNARY_P (op))
6772 op = XEXP (op, 0);
6774 if (SUBREG_P (op))
6776 if (REG_P (SUBREG_REG (op))
6777 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6778 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6779 GET_MODE (SUBREG_REG (op)),
6780 SUBREG_BYTE (op),
6781 GET_MODE (op));
6782 op = SUBREG_REG (op);
6785 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6786 continue;
6788 op_alt = recog_op_alt;
6790 /* Operand has no constraints, anything is OK. */
6791 win = !n_alternatives;
6793 alternative_mask preferred = get_preferred_alternatives (insn);
6794 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6796 if (!TEST_BIT (preferred, j))
6797 continue;
6798 if (op_alt[i].anything_ok
6799 || (op_alt[i].matches != -1
6800 && operands_match_p
6801 (recog_data.operand[i],
6802 recog_data.operand[op_alt[i].matches]))
6803 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6805 win = true;
6806 break;
6810 if (!win)
6811 return false;
6814 return true;
6817 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6819 static unsigned HOST_WIDE_INT
6820 ix86_asan_shadow_offset (void)
6822 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6823 : HOST_WIDE_INT_C (0x7fff8000))
6824 : (HOST_WIDE_INT_1 << 29);
6827 /* Argument support functions. */
6829 /* Return true when register may be used to pass function parameters. */
6830 bool
6831 ix86_function_arg_regno_p (int regno)
6833 int i;
6834 enum calling_abi call_abi;
6835 const int *parm_regs;
6837 if (TARGET_MPX && BND_REGNO_P (regno))
6838 return true;
6840 if (!TARGET_64BIT)
6842 if (TARGET_MACHO)
6843 return (regno < REGPARM_MAX
6844 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6845 else
6846 return (regno < REGPARM_MAX
6847 || (TARGET_MMX && MMX_REGNO_P (regno)
6848 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6849 || (TARGET_SSE && SSE_REGNO_P (regno)
6850 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6853 if (TARGET_SSE && SSE_REGNO_P (regno)
6854 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6855 return true;
6857 /* TODO: The function should depend on current function ABI but
6858 builtins.c would need updating then. Therefore we use the
6859 default ABI. */
6860 call_abi = ix86_cfun_abi ();
6862 /* RAX is used as hidden argument to va_arg functions. */
6863 if (call_abi == SYSV_ABI && regno == AX_REG)
6864 return true;
6866 if (call_abi == MS_ABI)
6867 parm_regs = x86_64_ms_abi_int_parameter_registers;
6868 else
6869 parm_regs = x86_64_int_parameter_registers;
6871 for (i = 0; i < (call_abi == MS_ABI
6872 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6873 if (regno == parm_regs[i])
6874 return true;
6875 return false;
6878 /* Return if we do not know how to pass TYPE solely in registers. */
6880 static bool
6881 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6883 if (must_pass_in_stack_var_size_or_pad (mode, type))
6884 return true;
6886 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6887 The layout_type routine is crafty and tries to trick us into passing
6888 currently unsupported vector types on the stack by using TImode. */
6889 return (!TARGET_64BIT && mode == TImode
6890 && type && TREE_CODE (type) != VECTOR_TYPE);
6893 /* It returns the size, in bytes, of the area reserved for arguments passed
6894 in registers for the function represented by fndecl dependent to the used
6895 abi format. */
6897 ix86_reg_parm_stack_space (const_tree fndecl)
6899 enum calling_abi call_abi = SYSV_ABI;
6900 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6901 call_abi = ix86_function_abi (fndecl);
6902 else
6903 call_abi = ix86_function_type_abi (fndecl);
6904 if (TARGET_64BIT && call_abi == MS_ABI)
6905 return 32;
6906 return 0;
6909 /* We add this as a workaround in order to use libc_has_function
6910 hook in i386.md. */
6911 bool
6912 ix86_libc_has_function (enum function_class fn_class)
6914 return targetm.libc_has_function (fn_class);
6917 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
6918 specifying the call abi used. */
6919 enum calling_abi
6920 ix86_function_type_abi (const_tree fntype)
6922 enum calling_abi abi = ix86_abi;
6924 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
6925 return abi;
6927 if (abi == SYSV_ABI
6928 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6930 static int warned;
6931 if (TARGET_X32 && !warned)
6933 error ("X32 does not support ms_abi attribute");
6934 warned = 1;
6937 abi = MS_ABI;
6939 else if (abi == MS_ABI
6940 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6941 abi = SYSV_ABI;
6943 return abi;
6946 static enum calling_abi
6947 ix86_function_abi (const_tree fndecl)
6949 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
6952 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
6953 specifying the call abi used. */
6954 enum calling_abi
6955 ix86_cfun_abi (void)
6957 return cfun ? cfun->machine->call_abi : ix86_abi;
6960 static bool
6961 ix86_function_ms_hook_prologue (const_tree fn)
6963 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6965 if (decl_function_context (fn) != NULL_TREE)
6966 error_at (DECL_SOURCE_LOCATION (fn),
6967 "ms_hook_prologue is not compatible with nested function");
6968 else
6969 return true;
6971 return false;
6974 static bool
6975 ix86_function_naked (const_tree fn)
6977 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
6978 return true;
6980 return false;
6983 /* Write the extra assembler code needed to declare a function properly. */
6985 void
6986 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6987 tree decl)
6989 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6991 if (is_ms_hook)
6993 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6994 unsigned int filler_cc = 0xcccccccc;
6996 for (i = 0; i < filler_count; i += 4)
6997 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
7000 #ifdef SUBTARGET_ASM_UNWIND_INIT
7001 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
7002 #endif
7004 ASM_OUTPUT_LABEL (asm_out_file, fname);
7006 /* Output magic byte marker, if hot-patch attribute is set. */
7007 if (is_ms_hook)
7009 if (TARGET_64BIT)
7011 /* leaq [%rsp + 0], %rsp */
7012 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
7013 asm_out_file);
7015 else
7017 /* movl.s %edi, %edi
7018 push %ebp
7019 movl.s %esp, %ebp */
7020 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
7025 /* Implementation of call abi switching target hook. Specific to FNDECL
7026 the specific call register sets are set. See also
7027 ix86_conditional_register_usage for more details. */
7028 void
7029 ix86_call_abi_override (const_tree fndecl)
7031 cfun->machine->call_abi = ix86_function_abi (fndecl);
7034 /* Return 1 if pseudo register should be created and used to hold
7035 GOT address for PIC code. */
7036 bool
7037 ix86_use_pseudo_pic_reg (void)
7039 if ((TARGET_64BIT
7040 && (ix86_cmodel == CM_SMALL_PIC
7041 || TARGET_PECOFF))
7042 || !flag_pic)
7043 return false;
7044 return true;
7047 /* Initialize large model PIC register. */
7049 static void
7050 ix86_init_large_pic_reg (unsigned int tmp_regno)
7052 rtx_code_label *label;
7053 rtx tmp_reg;
7055 gcc_assert (Pmode == DImode);
7056 label = gen_label_rtx ();
7057 emit_label (label);
7058 LABEL_PRESERVE_P (label) = 1;
7059 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
7060 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
7061 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
7062 label));
7063 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7064 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
7065 pic_offset_table_rtx, tmp_reg));
7066 const char *name = LABEL_NAME (label);
7067 PUT_CODE (label, NOTE);
7068 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
7069 NOTE_DELETED_LABEL_NAME (label) = name;
7072 /* Create and initialize PIC register if required. */
7073 static void
7074 ix86_init_pic_reg (void)
7076 edge entry_edge;
7077 rtx_insn *seq;
7079 if (!ix86_use_pseudo_pic_reg ())
7080 return;
7082 start_sequence ();
7084 if (TARGET_64BIT)
7086 if (ix86_cmodel == CM_LARGE_PIC)
7087 ix86_init_large_pic_reg (R11_REG);
7088 else
7089 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7091 else
7093 /* If there is future mcount call in the function it is more profitable
7094 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
7095 rtx reg = crtl->profile
7096 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
7097 : pic_offset_table_rtx;
7098 rtx_insn *insn = emit_insn (gen_set_got (reg));
7099 RTX_FRAME_RELATED_P (insn) = 1;
7100 if (crtl->profile)
7101 emit_move_insn (pic_offset_table_rtx, reg);
7102 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
7105 seq = get_insns ();
7106 end_sequence ();
7108 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7109 insert_insn_on_edge (seq, entry_edge);
7110 commit_one_edge_insertion (entry_edge);
7113 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7114 for a call to a function whose data type is FNTYPE.
7115 For a library call, FNTYPE is 0. */
7117 void
7118 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
7119 tree fntype, /* tree ptr for function decl */
7120 rtx libname, /* SYMBOL_REF of library name or 0 */
7121 tree fndecl,
7122 int caller)
7124 struct cgraph_local_info *i = NULL;
7125 struct cgraph_node *target = NULL;
7127 memset (cum, 0, sizeof (*cum));
7129 if (fndecl)
7131 target = cgraph_node::get (fndecl);
7132 if (target)
7134 target = target->function_symbol ();
7135 i = cgraph_node::local_info (target->decl);
7136 cum->call_abi = ix86_function_abi (target->decl);
7138 else
7139 cum->call_abi = ix86_function_abi (fndecl);
7141 else
7142 cum->call_abi = ix86_function_type_abi (fntype);
7144 cum->caller = caller;
7146 /* Set up the number of registers to use for passing arguments. */
7147 cum->nregs = ix86_regparm;
7148 if (TARGET_64BIT)
7150 cum->nregs = (cum->call_abi == SYSV_ABI
7151 ? X86_64_REGPARM_MAX
7152 : X86_64_MS_REGPARM_MAX);
7154 if (TARGET_SSE)
7156 cum->sse_nregs = SSE_REGPARM_MAX;
7157 if (TARGET_64BIT)
7159 cum->sse_nregs = (cum->call_abi == SYSV_ABI
7160 ? X86_64_SSE_REGPARM_MAX
7161 : X86_64_MS_SSE_REGPARM_MAX);
7164 if (TARGET_MMX)
7165 cum->mmx_nregs = MMX_REGPARM_MAX;
7166 cum->warn_avx512f = true;
7167 cum->warn_avx = true;
7168 cum->warn_sse = true;
7169 cum->warn_mmx = true;
7171 /* Because type might mismatch in between caller and callee, we need to
7172 use actual type of function for local calls.
7173 FIXME: cgraph_analyze can be told to actually record if function uses
7174 va_start so for local functions maybe_vaarg can be made aggressive
7175 helping K&R code.
7176 FIXME: once typesytem is fixed, we won't need this code anymore. */
7177 if (i && i->local && i->can_change_signature)
7178 fntype = TREE_TYPE (target->decl);
7179 cum->stdarg = stdarg_p (fntype);
7180 cum->maybe_vaarg = (fntype
7181 ? (!prototype_p (fntype) || stdarg_p (fntype))
7182 : !libname);
7184 cum->bnd_regno = FIRST_BND_REG;
7185 cum->bnds_in_bt = 0;
7186 cum->force_bnd_pass = 0;
7187 cum->decl = fndecl;
7189 if (!TARGET_64BIT)
7191 /* If there are variable arguments, then we won't pass anything
7192 in registers in 32-bit mode. */
7193 if (stdarg_p (fntype))
7195 cum->nregs = 0;
7196 /* Since in 32-bit, variable arguments are always passed on
7197 stack, there is scratch register available for indirect
7198 sibcall. */
7199 cfun->machine->arg_reg_available = true;
7200 cum->sse_nregs = 0;
7201 cum->mmx_nregs = 0;
7202 cum->warn_avx512f = false;
7203 cum->warn_avx = false;
7204 cum->warn_sse = false;
7205 cum->warn_mmx = false;
7206 return;
7209 /* Use ecx and edx registers if function has fastcall attribute,
7210 else look for regparm information. */
7211 if (fntype)
7213 unsigned int ccvt = ix86_get_callcvt (fntype);
7214 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7216 cum->nregs = 1;
7217 cum->fastcall = 1; /* Same first register as in fastcall. */
7219 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7221 cum->nregs = 2;
7222 cum->fastcall = 1;
7224 else
7225 cum->nregs = ix86_function_regparm (fntype, fndecl);
7228 /* Set up the number of SSE registers used for passing SFmode
7229 and DFmode arguments. Warn for mismatching ABI. */
7230 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7233 cfun->machine->arg_reg_available = (cum->nregs > 0);
7236 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7237 But in the case of vector types, it is some vector mode.
7239 When we have only some of our vector isa extensions enabled, then there
7240 are some modes for which vector_mode_supported_p is false. For these
7241 modes, the generic vector support in gcc will choose some non-vector mode
7242 in order to implement the type. By computing the natural mode, we'll
7243 select the proper ABI location for the operand and not depend on whatever
7244 the middle-end decides to do with these vector types.
7246 The midde-end can't deal with the vector types > 16 bytes. In this
7247 case, we return the original mode and warn ABI change if CUM isn't
7248 NULL.
7250 If INT_RETURN is true, warn ABI change if the vector mode isn't
7251 available for function return value. */
7253 static machine_mode
7254 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7255 bool in_return)
7257 machine_mode mode = TYPE_MODE (type);
7259 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7261 HOST_WIDE_INT size = int_size_in_bytes (type);
7262 if ((size == 8 || size == 16 || size == 32 || size == 64)
7263 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7264 && TYPE_VECTOR_SUBPARTS (type) > 1)
7266 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7268 /* There are no XFmode vector modes. */
7269 if (innermode == XFmode)
7270 return mode;
7272 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7273 mode = MIN_MODE_VECTOR_FLOAT;
7274 else
7275 mode = MIN_MODE_VECTOR_INT;
7277 /* Get the mode which has this inner mode and number of units. */
7278 FOR_EACH_MODE_FROM (mode, mode)
7279 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7280 && GET_MODE_INNER (mode) == innermode)
7282 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7284 static bool warnedavx512f;
7285 static bool warnedavx512f_ret;
7287 if (cum && cum->warn_avx512f && !warnedavx512f)
7289 if (warning (OPT_Wpsabi, "AVX512F vector argument "
7290 "without AVX512F enabled changes the ABI"))
7291 warnedavx512f = true;
7293 else if (in_return && !warnedavx512f_ret)
7295 if (warning (OPT_Wpsabi, "AVX512F vector return "
7296 "without AVX512F enabled changes the ABI"))
7297 warnedavx512f_ret = true;
7300 return TYPE_MODE (type);
7302 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7304 static bool warnedavx;
7305 static bool warnedavx_ret;
7307 if (cum && cum->warn_avx && !warnedavx)
7309 if (warning (OPT_Wpsabi, "AVX vector argument "
7310 "without AVX enabled changes the ABI"))
7311 warnedavx = true;
7313 else if (in_return && !warnedavx_ret)
7315 if (warning (OPT_Wpsabi, "AVX vector return "
7316 "without AVX enabled changes the ABI"))
7317 warnedavx_ret = true;
7320 return TYPE_MODE (type);
7322 else if (((size == 8 && TARGET_64BIT) || size == 16)
7323 && !TARGET_SSE
7324 && !TARGET_IAMCU)
7326 static bool warnedsse;
7327 static bool warnedsse_ret;
7329 if (cum && cum->warn_sse && !warnedsse)
7331 if (warning (OPT_Wpsabi, "SSE vector argument "
7332 "without SSE enabled changes the ABI"))
7333 warnedsse = true;
7335 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7337 if (warning (OPT_Wpsabi, "SSE vector return "
7338 "without SSE enabled changes the ABI"))
7339 warnedsse_ret = true;
7342 else if ((size == 8 && !TARGET_64BIT)
7343 && (!cfun
7344 || cfun->machine->func_type == TYPE_NORMAL)
7345 && !TARGET_MMX
7346 && !TARGET_IAMCU)
7348 static bool warnedmmx;
7349 static bool warnedmmx_ret;
7351 if (cum && cum->warn_mmx && !warnedmmx)
7353 if (warning (OPT_Wpsabi, "MMX vector argument "
7354 "without MMX enabled changes the ABI"))
7355 warnedmmx = true;
7357 else if (in_return && !warnedmmx_ret)
7359 if (warning (OPT_Wpsabi, "MMX vector return "
7360 "without MMX enabled changes the ABI"))
7361 warnedmmx_ret = true;
7364 return mode;
7367 gcc_unreachable ();
7371 return mode;
7374 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7375 this may not agree with the mode that the type system has chosen for the
7376 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7377 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7379 static rtx
7380 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7381 unsigned int regno)
7383 rtx tmp;
7385 if (orig_mode != BLKmode)
7386 tmp = gen_rtx_REG (orig_mode, regno);
7387 else
7389 tmp = gen_rtx_REG (mode, regno);
7390 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7391 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7394 return tmp;
7397 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7398 of this code is to classify each 8bytes of incoming argument by the register
7399 class and assign registers accordingly. */
7401 /* Return the union class of CLASS1 and CLASS2.
7402 See the x86-64 PS ABI for details. */
7404 static enum x86_64_reg_class
7405 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7407 /* Rule #1: If both classes are equal, this is the resulting class. */
7408 if (class1 == class2)
7409 return class1;
7411 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7412 the other class. */
7413 if (class1 == X86_64_NO_CLASS)
7414 return class2;
7415 if (class2 == X86_64_NO_CLASS)
7416 return class1;
7418 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7419 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7420 return X86_64_MEMORY_CLASS;
7422 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7423 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7424 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7425 return X86_64_INTEGERSI_CLASS;
7426 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7427 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7428 return X86_64_INTEGER_CLASS;
7430 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7431 MEMORY is used. */
7432 if (class1 == X86_64_X87_CLASS
7433 || class1 == X86_64_X87UP_CLASS
7434 || class1 == X86_64_COMPLEX_X87_CLASS
7435 || class2 == X86_64_X87_CLASS
7436 || class2 == X86_64_X87UP_CLASS
7437 || class2 == X86_64_COMPLEX_X87_CLASS)
7438 return X86_64_MEMORY_CLASS;
7440 /* Rule #6: Otherwise class SSE is used. */
7441 return X86_64_SSE_CLASS;
7444 /* Classify the argument of type TYPE and mode MODE.
7445 CLASSES will be filled by the register class used to pass each word
7446 of the operand. The number of words is returned. In case the parameter
7447 should be passed in memory, 0 is returned. As a special case for zero
7448 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7450 BIT_OFFSET is used internally for handling records and specifies offset
7451 of the offset in bits modulo 512 to avoid overflow cases.
7453 See the x86-64 PS ABI for details.
7456 static int
7457 classify_argument (machine_mode mode, const_tree type,
7458 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
7460 HOST_WIDE_INT bytes =
7461 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7462 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
7464 /* Variable sized entities are always passed/returned in memory. */
7465 if (bytes < 0)
7466 return 0;
7468 if (mode != VOIDmode
7469 && targetm.calls.must_pass_in_stack (mode, type))
7470 return 0;
7472 if (type && AGGREGATE_TYPE_P (type))
7474 int i;
7475 tree field;
7476 enum x86_64_reg_class subclasses[MAX_CLASSES];
7478 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
7479 if (bytes > 64)
7480 return 0;
7482 for (i = 0; i < words; i++)
7483 classes[i] = X86_64_NO_CLASS;
7485 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
7486 signalize memory class, so handle it as special case. */
7487 if (!words)
7489 classes[0] = X86_64_NO_CLASS;
7490 return 1;
7493 /* Classify each field of record and merge classes. */
7494 switch (TREE_CODE (type))
7496 case RECORD_TYPE:
7497 /* And now merge the fields of structure. */
7498 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7500 if (TREE_CODE (field) == FIELD_DECL)
7502 int num;
7504 if (TREE_TYPE (field) == error_mark_node)
7505 continue;
7507 /* Bitfields are always classified as integer. Handle them
7508 early, since later code would consider them to be
7509 misaligned integers. */
7510 if (DECL_BIT_FIELD (field))
7512 for (i = (int_bit_position (field)
7513 + (bit_offset % 64)) / 8 / 8;
7514 i < ((int_bit_position (field) + (bit_offset % 64))
7515 + tree_to_shwi (DECL_SIZE (field))
7516 + 63) / 8 / 8; i++)
7517 classes[i] =
7518 merge_classes (X86_64_INTEGER_CLASS,
7519 classes[i]);
7521 else
7523 int pos;
7525 type = TREE_TYPE (field);
7527 /* Flexible array member is ignored. */
7528 if (TYPE_MODE (type) == BLKmode
7529 && TREE_CODE (type) == ARRAY_TYPE
7530 && TYPE_SIZE (type) == NULL_TREE
7531 && TYPE_DOMAIN (type) != NULL_TREE
7532 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
7533 == NULL_TREE))
7535 static bool warned;
7537 if (!warned && warn_psabi)
7539 warned = true;
7540 inform (input_location,
7541 "the ABI of passing struct with"
7542 " a flexible array member has"
7543 " changed in GCC 4.4");
7545 continue;
7547 num = classify_argument (TYPE_MODE (type), type,
7548 subclasses,
7549 (int_bit_position (field)
7550 + bit_offset) % 512);
7551 if (!num)
7552 return 0;
7553 pos = (int_bit_position (field)
7554 + (bit_offset % 64)) / 8 / 8;
7555 for (i = 0; i < num && (i + pos) < words; i++)
7556 classes[i + pos] =
7557 merge_classes (subclasses[i], classes[i + pos]);
7561 break;
7563 case ARRAY_TYPE:
7564 /* Arrays are handled as small records. */
7566 int num;
7567 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
7568 TREE_TYPE (type), subclasses, bit_offset);
7569 if (!num)
7570 return 0;
7572 /* The partial classes are now full classes. */
7573 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
7574 subclasses[0] = X86_64_SSE_CLASS;
7575 if (subclasses[0] == X86_64_INTEGERSI_CLASS
7576 && !((bit_offset % 64) == 0 && bytes == 4))
7577 subclasses[0] = X86_64_INTEGER_CLASS;
7579 for (i = 0; i < words; i++)
7580 classes[i] = subclasses[i % num];
7582 break;
7584 case UNION_TYPE:
7585 case QUAL_UNION_TYPE:
7586 /* Unions are similar to RECORD_TYPE but offset is always 0.
7588 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7590 if (TREE_CODE (field) == FIELD_DECL)
7592 int num;
7594 if (TREE_TYPE (field) == error_mark_node)
7595 continue;
7597 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
7598 TREE_TYPE (field), subclasses,
7599 bit_offset);
7600 if (!num)
7601 return 0;
7602 for (i = 0; i < num && i < words; i++)
7603 classes[i] = merge_classes (subclasses[i], classes[i]);
7606 break;
7608 default:
7609 gcc_unreachable ();
7612 if (words > 2)
7614 /* When size > 16 bytes, if the first one isn't
7615 X86_64_SSE_CLASS or any other ones aren't
7616 X86_64_SSEUP_CLASS, everything should be passed in
7617 memory. */
7618 if (classes[0] != X86_64_SSE_CLASS)
7619 return 0;
7621 for (i = 1; i < words; i++)
7622 if (classes[i] != X86_64_SSEUP_CLASS)
7623 return 0;
7626 /* Final merger cleanup. */
7627 for (i = 0; i < words; i++)
7629 /* If one class is MEMORY, everything should be passed in
7630 memory. */
7631 if (classes[i] == X86_64_MEMORY_CLASS)
7632 return 0;
7634 /* The X86_64_SSEUP_CLASS should be always preceded by
7635 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
7636 if (classes[i] == X86_64_SSEUP_CLASS
7637 && classes[i - 1] != X86_64_SSE_CLASS
7638 && classes[i - 1] != X86_64_SSEUP_CLASS)
7640 /* The first one should never be X86_64_SSEUP_CLASS. */
7641 gcc_assert (i != 0);
7642 classes[i] = X86_64_SSE_CLASS;
7645 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
7646 everything should be passed in memory. */
7647 if (classes[i] == X86_64_X87UP_CLASS
7648 && (classes[i - 1] != X86_64_X87_CLASS))
7650 static bool warned;
7652 /* The first one should never be X86_64_X87UP_CLASS. */
7653 gcc_assert (i != 0);
7654 if (!warned && warn_psabi)
7656 warned = true;
7657 inform (input_location,
7658 "the ABI of passing union with long double"
7659 " has changed in GCC 4.4");
7661 return 0;
7664 return words;
7667 /* Compute alignment needed. We align all types to natural boundaries with
7668 exception of XFmode that is aligned to 64bits. */
7669 if (mode != VOIDmode && mode != BLKmode)
7671 int mode_alignment = GET_MODE_BITSIZE (mode);
7673 if (mode == XFmode)
7674 mode_alignment = 128;
7675 else if (mode == XCmode)
7676 mode_alignment = 256;
7677 if (COMPLEX_MODE_P (mode))
7678 mode_alignment /= 2;
7679 /* Misaligned fields are always returned in memory. */
7680 if (bit_offset % mode_alignment)
7681 return 0;
7684 /* for V1xx modes, just use the base mode */
7685 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7686 && GET_MODE_UNIT_SIZE (mode) == bytes)
7687 mode = GET_MODE_INNER (mode);
7689 /* Classification of atomic types. */
7690 switch (mode)
7692 case E_SDmode:
7693 case E_DDmode:
7694 classes[0] = X86_64_SSE_CLASS;
7695 return 1;
7696 case E_TDmode:
7697 classes[0] = X86_64_SSE_CLASS;
7698 classes[1] = X86_64_SSEUP_CLASS;
7699 return 2;
7700 case E_DImode:
7701 case E_SImode:
7702 case E_HImode:
7703 case E_QImode:
7704 case E_CSImode:
7705 case E_CHImode:
7706 case E_CQImode:
7708 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7710 /* Analyze last 128 bits only. */
7711 size = (size - 1) & 0x7f;
7713 if (size < 32)
7715 classes[0] = X86_64_INTEGERSI_CLASS;
7716 return 1;
7718 else if (size < 64)
7720 classes[0] = X86_64_INTEGER_CLASS;
7721 return 1;
7723 else if (size < 64+32)
7725 classes[0] = X86_64_INTEGER_CLASS;
7726 classes[1] = X86_64_INTEGERSI_CLASS;
7727 return 2;
7729 else if (size < 64+64)
7731 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7732 return 2;
7734 else
7735 gcc_unreachable ();
7737 case E_CDImode:
7738 case E_TImode:
7739 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7740 return 2;
7741 case E_COImode:
7742 case E_OImode:
7743 /* OImode shouldn't be used directly. */
7744 gcc_unreachable ();
7745 case E_CTImode:
7746 return 0;
7747 case E_SFmode:
7748 if (!(bit_offset % 64))
7749 classes[0] = X86_64_SSESF_CLASS;
7750 else
7751 classes[0] = X86_64_SSE_CLASS;
7752 return 1;
7753 case E_DFmode:
7754 classes[0] = X86_64_SSEDF_CLASS;
7755 return 1;
7756 case E_XFmode:
7757 classes[0] = X86_64_X87_CLASS;
7758 classes[1] = X86_64_X87UP_CLASS;
7759 return 2;
7760 case E_TFmode:
7761 classes[0] = X86_64_SSE_CLASS;
7762 classes[1] = X86_64_SSEUP_CLASS;
7763 return 2;
7764 case E_SCmode:
7765 classes[0] = X86_64_SSE_CLASS;
7766 if (!(bit_offset % 64))
7767 return 1;
7768 else
7770 static bool warned;
7772 if (!warned && warn_psabi)
7774 warned = true;
7775 inform (input_location,
7776 "the ABI of passing structure with complex float"
7777 " member has changed in GCC 4.4");
7779 classes[1] = X86_64_SSESF_CLASS;
7780 return 2;
7782 case E_DCmode:
7783 classes[0] = X86_64_SSEDF_CLASS;
7784 classes[1] = X86_64_SSEDF_CLASS;
7785 return 2;
7786 case E_XCmode:
7787 classes[0] = X86_64_COMPLEX_X87_CLASS;
7788 return 1;
7789 case E_TCmode:
7790 /* This modes is larger than 16 bytes. */
7791 return 0;
7792 case E_V8SFmode:
7793 case E_V8SImode:
7794 case E_V32QImode:
7795 case E_V16HImode:
7796 case E_V4DFmode:
7797 case E_V4DImode:
7798 classes[0] = X86_64_SSE_CLASS;
7799 classes[1] = X86_64_SSEUP_CLASS;
7800 classes[2] = X86_64_SSEUP_CLASS;
7801 classes[3] = X86_64_SSEUP_CLASS;
7802 return 4;
7803 case E_V8DFmode:
7804 case E_V16SFmode:
7805 case E_V8DImode:
7806 case E_V16SImode:
7807 case E_V32HImode:
7808 case E_V64QImode:
7809 classes[0] = X86_64_SSE_CLASS;
7810 classes[1] = X86_64_SSEUP_CLASS;
7811 classes[2] = X86_64_SSEUP_CLASS;
7812 classes[3] = X86_64_SSEUP_CLASS;
7813 classes[4] = X86_64_SSEUP_CLASS;
7814 classes[5] = X86_64_SSEUP_CLASS;
7815 classes[6] = X86_64_SSEUP_CLASS;
7816 classes[7] = X86_64_SSEUP_CLASS;
7817 return 8;
7818 case E_V4SFmode:
7819 case E_V4SImode:
7820 case E_V16QImode:
7821 case E_V8HImode:
7822 case E_V2DFmode:
7823 case E_V2DImode:
7824 classes[0] = X86_64_SSE_CLASS;
7825 classes[1] = X86_64_SSEUP_CLASS;
7826 return 2;
7827 case E_V1TImode:
7828 case E_V1DImode:
7829 case E_V2SFmode:
7830 case E_V2SImode:
7831 case E_V4HImode:
7832 case E_V8QImode:
7833 classes[0] = X86_64_SSE_CLASS;
7834 return 1;
7835 case E_BLKmode:
7836 case E_VOIDmode:
7837 return 0;
7838 default:
7839 gcc_assert (VECTOR_MODE_P (mode));
7841 if (bytes > 16)
7842 return 0;
7844 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7846 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7847 classes[0] = X86_64_INTEGERSI_CLASS;
7848 else
7849 classes[0] = X86_64_INTEGER_CLASS;
7850 classes[1] = X86_64_INTEGER_CLASS;
7851 return 1 + (bytes > 8);
7855 /* Examine the argument and return set number of register required in each
7856 class. Return true iff parameter should be passed in memory. */
7858 static bool
7859 examine_argument (machine_mode mode, const_tree type, int in_return,
7860 int *int_nregs, int *sse_nregs)
7862 enum x86_64_reg_class regclass[MAX_CLASSES];
7863 int n = classify_argument (mode, type, regclass, 0);
7865 *int_nregs = 0;
7866 *sse_nregs = 0;
7868 if (!n)
7869 return true;
7870 for (n--; n >= 0; n--)
7871 switch (regclass[n])
7873 case X86_64_INTEGER_CLASS:
7874 case X86_64_INTEGERSI_CLASS:
7875 (*int_nregs)++;
7876 break;
7877 case X86_64_SSE_CLASS:
7878 case X86_64_SSESF_CLASS:
7879 case X86_64_SSEDF_CLASS:
7880 (*sse_nregs)++;
7881 break;
7882 case X86_64_NO_CLASS:
7883 case X86_64_SSEUP_CLASS:
7884 break;
7885 case X86_64_X87_CLASS:
7886 case X86_64_X87UP_CLASS:
7887 case X86_64_COMPLEX_X87_CLASS:
7888 if (!in_return)
7889 return true;
7890 break;
7891 case X86_64_MEMORY_CLASS:
7892 gcc_unreachable ();
7895 return false;
7898 /* Construct container for the argument used by GCC interface. See
7899 FUNCTION_ARG for the detailed description. */
7901 static rtx
7902 construct_container (machine_mode mode, machine_mode orig_mode,
7903 const_tree type, int in_return, int nintregs, int nsseregs,
7904 const int *intreg, int sse_regno)
7906 /* The following variables hold the static issued_error state. */
7907 static bool issued_sse_arg_error;
7908 static bool issued_sse_ret_error;
7909 static bool issued_x87_ret_error;
7911 machine_mode tmpmode;
7912 int bytes =
7913 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7914 enum x86_64_reg_class regclass[MAX_CLASSES];
7915 int n;
7916 int i;
7917 int nexps = 0;
7918 int needed_sseregs, needed_intregs;
7919 rtx exp[MAX_CLASSES];
7920 rtx ret;
7922 n = classify_argument (mode, type, regclass, 0);
7923 if (!n)
7924 return NULL;
7925 if (examine_argument (mode, type, in_return, &needed_intregs,
7926 &needed_sseregs))
7927 return NULL;
7928 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7929 return NULL;
7931 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7932 some less clueful developer tries to use floating-point anyway. */
7933 if (needed_sseregs && !TARGET_SSE)
7935 if (in_return)
7937 if (!issued_sse_ret_error)
7939 error ("SSE register return with SSE disabled");
7940 issued_sse_ret_error = true;
7943 else if (!issued_sse_arg_error)
7945 error ("SSE register argument with SSE disabled");
7946 issued_sse_arg_error = true;
7948 return NULL;
7951 /* Likewise, error if the ABI requires us to return values in the
7952 x87 registers and the user specified -mno-80387. */
7953 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7954 for (i = 0; i < n; i++)
7955 if (regclass[i] == X86_64_X87_CLASS
7956 || regclass[i] == X86_64_X87UP_CLASS
7957 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7959 if (!issued_x87_ret_error)
7961 error ("x87 register return with x87 disabled");
7962 issued_x87_ret_error = true;
7964 return NULL;
7967 /* First construct simple cases. Avoid SCmode, since we want to use
7968 single register to pass this type. */
7969 if (n == 1 && mode != SCmode)
7970 switch (regclass[0])
7972 case X86_64_INTEGER_CLASS:
7973 case X86_64_INTEGERSI_CLASS:
7974 return gen_rtx_REG (mode, intreg[0]);
7975 case X86_64_SSE_CLASS:
7976 case X86_64_SSESF_CLASS:
7977 case X86_64_SSEDF_CLASS:
7978 if (mode != BLKmode)
7979 return gen_reg_or_parallel (mode, orig_mode,
7980 SSE_REGNO (sse_regno));
7981 break;
7982 case X86_64_X87_CLASS:
7983 case X86_64_COMPLEX_X87_CLASS:
7984 return gen_rtx_REG (mode, FIRST_STACK_REG);
7985 case X86_64_NO_CLASS:
7986 /* Zero sized array, struct or class. */
7987 return NULL;
7988 default:
7989 gcc_unreachable ();
7991 if (n == 2
7992 && regclass[0] == X86_64_SSE_CLASS
7993 && regclass[1] == X86_64_SSEUP_CLASS
7994 && mode != BLKmode)
7995 return gen_reg_or_parallel (mode, orig_mode,
7996 SSE_REGNO (sse_regno));
7997 if (n == 4
7998 && regclass[0] == X86_64_SSE_CLASS
7999 && regclass[1] == X86_64_SSEUP_CLASS
8000 && regclass[2] == X86_64_SSEUP_CLASS
8001 && regclass[3] == X86_64_SSEUP_CLASS
8002 && mode != BLKmode)
8003 return gen_reg_or_parallel (mode, orig_mode,
8004 SSE_REGNO (sse_regno));
8005 if (n == 8
8006 && regclass[0] == X86_64_SSE_CLASS
8007 && regclass[1] == X86_64_SSEUP_CLASS
8008 && regclass[2] == X86_64_SSEUP_CLASS
8009 && regclass[3] == X86_64_SSEUP_CLASS
8010 && regclass[4] == X86_64_SSEUP_CLASS
8011 && regclass[5] == X86_64_SSEUP_CLASS
8012 && regclass[6] == X86_64_SSEUP_CLASS
8013 && regclass[7] == X86_64_SSEUP_CLASS
8014 && mode != BLKmode)
8015 return gen_reg_or_parallel (mode, orig_mode,
8016 SSE_REGNO (sse_regno));
8017 if (n == 2
8018 && regclass[0] == X86_64_X87_CLASS
8019 && regclass[1] == X86_64_X87UP_CLASS)
8020 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
8022 if (n == 2
8023 && regclass[0] == X86_64_INTEGER_CLASS
8024 && regclass[1] == X86_64_INTEGER_CLASS
8025 && (mode == CDImode || mode == TImode)
8026 && intreg[0] + 1 == intreg[1])
8027 return gen_rtx_REG (mode, intreg[0]);
8029 /* Otherwise figure out the entries of the PARALLEL. */
8030 for (i = 0; i < n; i++)
8032 int pos;
8034 switch (regclass[i])
8036 case X86_64_NO_CLASS:
8037 break;
8038 case X86_64_INTEGER_CLASS:
8039 case X86_64_INTEGERSI_CLASS:
8040 /* Merge TImodes on aligned occasions here too. */
8041 if (i * 8 + 8 > bytes)
8043 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
8044 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
8045 /* We've requested 24 bytes we
8046 don't have mode for. Use DImode. */
8047 tmpmode = DImode;
8049 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
8050 tmpmode = SImode;
8051 else
8052 tmpmode = DImode;
8053 exp [nexps++]
8054 = gen_rtx_EXPR_LIST (VOIDmode,
8055 gen_rtx_REG (tmpmode, *intreg),
8056 GEN_INT (i*8));
8057 intreg++;
8058 break;
8059 case X86_64_SSESF_CLASS:
8060 exp [nexps++]
8061 = gen_rtx_EXPR_LIST (VOIDmode,
8062 gen_rtx_REG (SFmode,
8063 SSE_REGNO (sse_regno)),
8064 GEN_INT (i*8));
8065 sse_regno++;
8066 break;
8067 case X86_64_SSEDF_CLASS:
8068 exp [nexps++]
8069 = gen_rtx_EXPR_LIST (VOIDmode,
8070 gen_rtx_REG (DFmode,
8071 SSE_REGNO (sse_regno)),
8072 GEN_INT (i*8));
8073 sse_regno++;
8074 break;
8075 case X86_64_SSE_CLASS:
8076 pos = i;
8077 switch (n)
8079 case 1:
8080 tmpmode = DImode;
8081 break;
8082 case 2:
8083 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
8085 tmpmode = TImode;
8086 i++;
8088 else
8089 tmpmode = DImode;
8090 break;
8091 case 4:
8092 gcc_assert (i == 0
8093 && regclass[1] == X86_64_SSEUP_CLASS
8094 && regclass[2] == X86_64_SSEUP_CLASS
8095 && regclass[3] == X86_64_SSEUP_CLASS);
8096 tmpmode = OImode;
8097 i += 3;
8098 break;
8099 case 8:
8100 gcc_assert (i == 0
8101 && regclass[1] == X86_64_SSEUP_CLASS
8102 && regclass[2] == X86_64_SSEUP_CLASS
8103 && regclass[3] == X86_64_SSEUP_CLASS
8104 && regclass[4] == X86_64_SSEUP_CLASS
8105 && regclass[5] == X86_64_SSEUP_CLASS
8106 && regclass[6] == X86_64_SSEUP_CLASS
8107 && regclass[7] == X86_64_SSEUP_CLASS);
8108 tmpmode = XImode;
8109 i += 7;
8110 break;
8111 default:
8112 gcc_unreachable ();
8114 exp [nexps++]
8115 = gen_rtx_EXPR_LIST (VOIDmode,
8116 gen_rtx_REG (tmpmode,
8117 SSE_REGNO (sse_regno)),
8118 GEN_INT (pos*8));
8119 sse_regno++;
8120 break;
8121 default:
8122 gcc_unreachable ();
8126 /* Empty aligned struct, union or class. */
8127 if (nexps == 0)
8128 return NULL;
8130 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
8131 for (i = 0; i < nexps; i++)
8132 XVECEXP (ret, 0, i) = exp [i];
8133 return ret;
8136 /* Update the data in CUM to advance over an argument of mode MODE
8137 and data type TYPE. (TYPE is null for libcalls where that information
8138 may not be available.)
8140 Return a number of integer regsiters advanced over. */
8142 static int
8143 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8144 const_tree type, HOST_WIDE_INT bytes,
8145 HOST_WIDE_INT words)
8147 int res = 0;
8148 bool error_p = false;
8150 if (TARGET_IAMCU)
8152 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8153 bytes in registers. */
8154 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8155 goto pass_in_reg;
8156 return res;
8159 switch (mode)
8161 default:
8162 break;
8164 case E_BLKmode:
8165 if (bytes < 0)
8166 break;
8167 /* FALLTHRU */
8169 case E_DImode:
8170 case E_SImode:
8171 case E_HImode:
8172 case E_QImode:
8173 pass_in_reg:
8174 cum->words += words;
8175 cum->nregs -= words;
8176 cum->regno += words;
8177 if (cum->nregs >= 0)
8178 res = words;
8179 if (cum->nregs <= 0)
8181 cum->nregs = 0;
8182 cfun->machine->arg_reg_available = false;
8183 cum->regno = 0;
8185 break;
8187 case E_OImode:
8188 /* OImode shouldn't be used directly. */
8189 gcc_unreachable ();
8191 case E_DFmode:
8192 if (cum->float_in_sse == -1)
8193 error_p = true;
8194 if (cum->float_in_sse < 2)
8195 break;
8196 /* FALLTHRU */
8197 case E_SFmode:
8198 if (cum->float_in_sse == -1)
8199 error_p = true;
8200 if (cum->float_in_sse < 1)
8201 break;
8202 /* FALLTHRU */
8204 case E_V8SFmode:
8205 case E_V8SImode:
8206 case E_V64QImode:
8207 case E_V32HImode:
8208 case E_V16SImode:
8209 case E_V8DImode:
8210 case E_V16SFmode:
8211 case E_V8DFmode:
8212 case E_V32QImode:
8213 case E_V16HImode:
8214 case E_V4DFmode:
8215 case E_V4DImode:
8216 case E_TImode:
8217 case E_V16QImode:
8218 case E_V8HImode:
8219 case E_V4SImode:
8220 case E_V2DImode:
8221 case E_V4SFmode:
8222 case E_V2DFmode:
8223 if (!type || !AGGREGATE_TYPE_P (type))
8225 cum->sse_words += words;
8226 cum->sse_nregs -= 1;
8227 cum->sse_regno += 1;
8228 if (cum->sse_nregs <= 0)
8230 cum->sse_nregs = 0;
8231 cum->sse_regno = 0;
8234 break;
8236 case E_V8QImode:
8237 case E_V4HImode:
8238 case E_V2SImode:
8239 case E_V2SFmode:
8240 case E_V1TImode:
8241 case E_V1DImode:
8242 if (!type || !AGGREGATE_TYPE_P (type))
8244 cum->mmx_words += words;
8245 cum->mmx_nregs -= 1;
8246 cum->mmx_regno += 1;
8247 if (cum->mmx_nregs <= 0)
8249 cum->mmx_nregs = 0;
8250 cum->mmx_regno = 0;
8253 break;
8255 if (error_p)
8257 cum->float_in_sse = 0;
8258 error ("calling %qD with SSE calling convention without "
8259 "SSE/SSE2 enabled", cum->decl);
8260 sorry ("this is a GCC bug that can be worked around by adding "
8261 "attribute used to function called");
8264 return res;
8267 static int
8268 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8269 const_tree type, HOST_WIDE_INT words, bool named)
8271 int int_nregs, sse_nregs;
8273 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8274 if (!named && (VALID_AVX512F_REG_MODE (mode)
8275 || VALID_AVX256_REG_MODE (mode)))
8276 return 0;
8278 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8279 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8281 cum->nregs -= int_nregs;
8282 cum->sse_nregs -= sse_nregs;
8283 cum->regno += int_nregs;
8284 cum->sse_regno += sse_nregs;
8285 return int_nregs;
8287 else
8289 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8290 cum->words = ROUND_UP (cum->words, align);
8291 cum->words += words;
8292 return 0;
8296 static int
8297 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8298 HOST_WIDE_INT words)
8300 /* Otherwise, this should be passed indirect. */
8301 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8303 cum->words += words;
8304 if (cum->nregs > 0)
8306 cum->nregs -= 1;
8307 cum->regno += 1;
8308 return 1;
8310 return 0;
8313 /* Update the data in CUM to advance over an argument of mode MODE and
8314 data type TYPE. (TYPE is null for libcalls where that information
8315 may not be available.) */
8317 static void
8318 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8319 const_tree type, bool named)
8321 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8322 HOST_WIDE_INT bytes, words;
8323 int nregs;
8325 /* The argument of interrupt handler is a special case and is
8326 handled in ix86_function_arg. */
8327 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8328 return;
8330 if (mode == BLKmode)
8331 bytes = int_size_in_bytes (type);
8332 else
8333 bytes = GET_MODE_SIZE (mode);
8334 words = CEIL (bytes, UNITS_PER_WORD);
8336 if (type)
8337 mode = type_natural_mode (type, NULL, false);
8339 if ((type && POINTER_BOUNDS_TYPE_P (type))
8340 || POINTER_BOUNDS_MODE_P (mode))
8342 /* If we pass bounds in BT then just update remained bounds count. */
8343 if (cum->bnds_in_bt)
8345 cum->bnds_in_bt--;
8346 return;
8349 /* Update remained number of bounds to force. */
8350 if (cum->force_bnd_pass)
8351 cum->force_bnd_pass--;
8353 cum->bnd_regno++;
8355 return;
8358 /* The first arg not going to Bounds Tables resets this counter. */
8359 cum->bnds_in_bt = 0;
8360 /* For unnamed args we always pass bounds to avoid bounds mess when
8361 passed and received types do not match. If bounds do not follow
8362 unnamed arg, still pretend required number of bounds were passed. */
8363 if (cum->force_bnd_pass)
8365 cum->bnd_regno += cum->force_bnd_pass;
8366 cum->force_bnd_pass = 0;
8369 if (TARGET_64BIT)
8371 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8373 if (call_abi == MS_ABI)
8374 nregs = function_arg_advance_ms_64 (cum, bytes, words);
8375 else
8376 nregs = function_arg_advance_64 (cum, mode, type, words, named);
8378 else
8379 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8381 /* For stdarg we expect bounds to be passed for each value passed
8382 in register. */
8383 if (cum->stdarg)
8384 cum->force_bnd_pass = nregs;
8385 /* For pointers passed in memory we expect bounds passed in Bounds
8386 Table. */
8387 if (!nregs)
8389 /* Track if there are outgoing arguments on stack. */
8390 if (cum->caller)
8391 cfun->machine->outgoing_args_on_stack = true;
8393 cum->bnds_in_bt = chkp_type_bounds_count (type);
8397 /* Define where to put the arguments to a function.
8398 Value is zero to push the argument on the stack,
8399 or a hard register in which to store the argument.
8401 MODE is the argument's machine mode.
8402 TYPE is the data type of the argument (as a tree).
8403 This is null for libcalls where that information may
8404 not be available.
8405 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8406 the preceding args and about the function being called.
8407 NAMED is nonzero if this argument is a named parameter
8408 (otherwise it is an extra parameter matching an ellipsis). */
8410 static rtx
8411 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8412 machine_mode orig_mode, const_tree type,
8413 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8415 bool error_p = false;
8417 /* Avoid the AL settings for the Unix64 ABI. */
8418 if (mode == VOIDmode)
8419 return constm1_rtx;
8421 if (TARGET_IAMCU)
8423 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8424 bytes in registers. */
8425 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8426 goto pass_in_reg;
8427 return NULL_RTX;
8430 switch (mode)
8432 default:
8433 break;
8435 case E_BLKmode:
8436 if (bytes < 0)
8437 break;
8438 /* FALLTHRU */
8439 case E_DImode:
8440 case E_SImode:
8441 case E_HImode:
8442 case E_QImode:
8443 pass_in_reg:
8444 if (words <= cum->nregs)
8446 int regno = cum->regno;
8448 /* Fastcall allocates the first two DWORD (SImode) or
8449 smaller arguments to ECX and EDX if it isn't an
8450 aggregate type . */
8451 if (cum->fastcall)
8453 if (mode == BLKmode
8454 || mode == DImode
8455 || (type && AGGREGATE_TYPE_P (type)))
8456 break;
8458 /* ECX not EAX is the first allocated register. */
8459 if (regno == AX_REG)
8460 regno = CX_REG;
8462 return gen_rtx_REG (mode, regno);
8464 break;
8466 case E_DFmode:
8467 if (cum->float_in_sse == -1)
8468 error_p = true;
8469 if (cum->float_in_sse < 2)
8470 break;
8471 /* FALLTHRU */
8472 case E_SFmode:
8473 if (cum->float_in_sse == -1)
8474 error_p = true;
8475 if (cum->float_in_sse < 1)
8476 break;
8477 /* FALLTHRU */
8478 case E_TImode:
8479 /* In 32bit, we pass TImode in xmm registers. */
8480 case E_V16QImode:
8481 case E_V8HImode:
8482 case E_V4SImode:
8483 case E_V2DImode:
8484 case E_V4SFmode:
8485 case E_V2DFmode:
8486 if (!type || !AGGREGATE_TYPE_P (type))
8488 if (cum->sse_nregs)
8489 return gen_reg_or_parallel (mode, orig_mode,
8490 cum->sse_regno + FIRST_SSE_REG);
8492 break;
8494 case E_OImode:
8495 case E_XImode:
8496 /* OImode and XImode shouldn't be used directly. */
8497 gcc_unreachable ();
8499 case E_V64QImode:
8500 case E_V32HImode:
8501 case E_V16SImode:
8502 case E_V8DImode:
8503 case E_V16SFmode:
8504 case E_V8DFmode:
8505 case E_V8SFmode:
8506 case E_V8SImode:
8507 case E_V32QImode:
8508 case E_V16HImode:
8509 case E_V4DFmode:
8510 case E_V4DImode:
8511 if (!type || !AGGREGATE_TYPE_P (type))
8513 if (cum->sse_nregs)
8514 return gen_reg_or_parallel (mode, orig_mode,
8515 cum->sse_regno + FIRST_SSE_REG);
8517 break;
8519 case E_V8QImode:
8520 case E_V4HImode:
8521 case E_V2SImode:
8522 case E_V2SFmode:
8523 case E_V1TImode:
8524 case E_V1DImode:
8525 if (!type || !AGGREGATE_TYPE_P (type))
8527 if (cum->mmx_nregs)
8528 return gen_reg_or_parallel (mode, orig_mode,
8529 cum->mmx_regno + FIRST_MMX_REG);
8531 break;
8533 if (error_p)
8535 cum->float_in_sse = 0;
8536 error ("calling %qD with SSE calling convention without "
8537 "SSE/SSE2 enabled", cum->decl);
8538 sorry ("this is a GCC bug that can be worked around by adding "
8539 "attribute used to function called");
8542 return NULL_RTX;
8545 static rtx
8546 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8547 machine_mode orig_mode, const_tree type, bool named)
8549 /* Handle a hidden AL argument containing number of registers
8550 for varargs x86-64 functions. */
8551 if (mode == VOIDmode)
8552 return GEN_INT (cum->maybe_vaarg
8553 ? (cum->sse_nregs < 0
8554 ? X86_64_SSE_REGPARM_MAX
8555 : cum->sse_regno)
8556 : -1);
8558 switch (mode)
8560 default:
8561 break;
8563 case E_V8SFmode:
8564 case E_V8SImode:
8565 case E_V32QImode:
8566 case E_V16HImode:
8567 case E_V4DFmode:
8568 case E_V4DImode:
8569 case E_V16SFmode:
8570 case E_V16SImode:
8571 case E_V64QImode:
8572 case E_V32HImode:
8573 case E_V8DFmode:
8574 case E_V8DImode:
8575 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8576 if (!named)
8577 return NULL;
8578 break;
8581 return construct_container (mode, orig_mode, type, 0, cum->nregs,
8582 cum->sse_nregs,
8583 &x86_64_int_parameter_registers [cum->regno],
8584 cum->sse_regno);
8587 static rtx
8588 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8589 machine_mode orig_mode, bool named,
8590 HOST_WIDE_INT bytes)
8592 unsigned int regno;
8594 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
8595 We use value of -2 to specify that current function call is MSABI. */
8596 if (mode == VOIDmode)
8597 return GEN_INT (-2);
8599 /* If we've run out of registers, it goes on the stack. */
8600 if (cum->nregs == 0)
8601 return NULL_RTX;
8603 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
8605 /* Only floating point modes are passed in anything but integer regs. */
8606 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
8608 if (named)
8609 regno = cum->regno + FIRST_SSE_REG;
8610 else
8612 rtx t1, t2;
8614 /* Unnamed floating parameters are passed in both the
8615 SSE and integer registers. */
8616 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
8617 t2 = gen_rtx_REG (mode, regno);
8618 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
8619 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
8620 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
8623 /* Handle aggregated types passed in register. */
8624 if (orig_mode == BLKmode)
8626 if (bytes > 0 && bytes <= 8)
8627 mode = (bytes > 4 ? DImode : SImode);
8628 if (mode == BLKmode)
8629 mode = DImode;
8632 return gen_reg_or_parallel (mode, orig_mode, regno);
8635 /* Return where to put the arguments to a function.
8636 Return zero to push the argument on the stack, or a hard register in which to store the argument.
8638 MODE is the argument's machine mode. TYPE is the data type of the
8639 argument. It is null for libcalls where that information may not be
8640 available. CUM gives information about the preceding args and about
8641 the function being called. NAMED is nonzero if this argument is a
8642 named parameter (otherwise it is an extra parameter matching an
8643 ellipsis). */
8645 static rtx
8646 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
8647 const_tree type, bool named)
8649 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8650 machine_mode mode = omode;
8651 HOST_WIDE_INT bytes, words;
8652 rtx arg;
8654 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8656 gcc_assert (type != NULL_TREE);
8657 if (POINTER_TYPE_P (type))
8659 /* This is the pointer argument. */
8660 gcc_assert (TYPE_MODE (type) == Pmode);
8661 /* It is at -WORD(AP) in the current frame in interrupt and
8662 exception handlers. */
8663 arg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
8665 else
8667 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
8668 && TREE_CODE (type) == INTEGER_TYPE
8669 && TYPE_MODE (type) == word_mode);
8670 /* The error code is the word-mode integer argument at
8671 -2 * WORD(AP) in the current frame of the exception
8672 handler. */
8673 arg = gen_rtx_MEM (word_mode,
8674 plus_constant (Pmode,
8675 arg_pointer_rtx,
8676 -2 * UNITS_PER_WORD));
8678 return arg;
8681 /* All pointer bounds arguments are handled separately here. */
8682 if ((type && POINTER_BOUNDS_TYPE_P (type))
8683 || POINTER_BOUNDS_MODE_P (mode))
8685 /* Return NULL if bounds are forced to go in Bounds Table. */
8686 if (cum->bnds_in_bt)
8687 arg = NULL;
8688 /* Return the next available bound reg if any. */
8689 else if (cum->bnd_regno <= LAST_BND_REG)
8690 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
8691 /* Return the next special slot number otherwise. */
8692 else
8693 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
8695 return arg;
8698 if (mode == BLKmode)
8699 bytes = int_size_in_bytes (type);
8700 else
8701 bytes = GET_MODE_SIZE (mode);
8702 words = CEIL (bytes, UNITS_PER_WORD);
8704 /* To simplify the code below, represent vector types with a vector mode
8705 even if MMX/SSE are not active. */
8706 if (type && TREE_CODE (type) == VECTOR_TYPE)
8707 mode = type_natural_mode (type, cum, false);
8709 if (TARGET_64BIT)
8711 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8713 if (call_abi == MS_ABI)
8714 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
8715 else
8716 arg = function_arg_64 (cum, mode, omode, type, named);
8718 else
8719 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
8721 /* Track if there are outgoing arguments on stack. */
8722 if (arg == NULL_RTX && cum->caller)
8723 cfun->machine->outgoing_args_on_stack = true;
8725 return arg;
8728 /* A C expression that indicates when an argument must be passed by
8729 reference. If nonzero for an argument, a copy of that argument is
8730 made in memory and a pointer to the argument is passed instead of
8731 the argument itself. The pointer is passed in whatever way is
8732 appropriate for passing a pointer to that type. */
8734 static bool
8735 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8736 const_tree type, bool)
8738 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8740 /* Bounds are never passed by reference. */
8741 if ((type && POINTER_BOUNDS_TYPE_P (type))
8742 || POINTER_BOUNDS_MODE_P (mode))
8743 return false;
8745 if (TARGET_64BIT)
8747 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8749 /* See Windows x64 Software Convention. */
8750 if (call_abi == MS_ABI)
8752 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
8754 if (type)
8756 /* Arrays are passed by reference. */
8757 if (TREE_CODE (type) == ARRAY_TYPE)
8758 return true;
8760 if (RECORD_OR_UNION_TYPE_P (type))
8762 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8763 are passed by reference. */
8764 msize = int_size_in_bytes (type);
8768 /* __m128 is passed by reference. */
8769 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8771 else if (type && int_size_in_bytes (type) == -1)
8772 return true;
8775 return false;
8778 /* Return true when TYPE should be 128bit aligned for 32bit argument
8779 passing ABI. XXX: This function is obsolete and is only used for
8780 checking psABI compatibility with previous versions of GCC. */
8782 static bool
8783 ix86_compat_aligned_value_p (const_tree type)
8785 machine_mode mode = TYPE_MODE (type);
8786 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8787 || mode == TDmode
8788 || mode == TFmode
8789 || mode == TCmode)
8790 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8791 return true;
8792 if (TYPE_ALIGN (type) < 128)
8793 return false;
8795 if (AGGREGATE_TYPE_P (type))
8797 /* Walk the aggregates recursively. */
8798 switch (TREE_CODE (type))
8800 case RECORD_TYPE:
8801 case UNION_TYPE:
8802 case QUAL_UNION_TYPE:
8804 tree field;
8806 /* Walk all the structure fields. */
8807 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8809 if (TREE_CODE (field) == FIELD_DECL
8810 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8811 return true;
8813 break;
8816 case ARRAY_TYPE:
8817 /* Just for use if some languages passes arrays by value. */
8818 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8819 return true;
8820 break;
8822 default:
8823 gcc_unreachable ();
8826 return false;
8829 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8830 XXX: This function is obsolete and is only used for checking psABI
8831 compatibility with previous versions of GCC. */
8833 static unsigned int
8834 ix86_compat_function_arg_boundary (machine_mode mode,
8835 const_tree type, unsigned int align)
8837 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8838 natural boundaries. */
8839 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8841 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8842 make an exception for SSE modes since these require 128bit
8843 alignment.
8845 The handling here differs from field_alignment. ICC aligns MMX
8846 arguments to 4 byte boundaries, while structure fields are aligned
8847 to 8 byte boundaries. */
8848 if (!type)
8850 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8851 align = PARM_BOUNDARY;
8853 else
8855 if (!ix86_compat_aligned_value_p (type))
8856 align = PARM_BOUNDARY;
8859 if (align > BIGGEST_ALIGNMENT)
8860 align = BIGGEST_ALIGNMENT;
8861 return align;
8864 /* Return true when TYPE should be 128bit aligned for 32bit argument
8865 passing ABI. */
8867 static bool
8868 ix86_contains_aligned_value_p (const_tree type)
8870 machine_mode mode = TYPE_MODE (type);
8872 if (mode == XFmode || mode == XCmode)
8873 return false;
8875 if (TYPE_ALIGN (type) < 128)
8876 return false;
8878 if (AGGREGATE_TYPE_P (type))
8880 /* Walk the aggregates recursively. */
8881 switch (TREE_CODE (type))
8883 case RECORD_TYPE:
8884 case UNION_TYPE:
8885 case QUAL_UNION_TYPE:
8887 tree field;
8889 /* Walk all the structure fields. */
8890 for (field = TYPE_FIELDS (type);
8891 field;
8892 field = DECL_CHAIN (field))
8894 if (TREE_CODE (field) == FIELD_DECL
8895 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8896 return true;
8898 break;
8901 case ARRAY_TYPE:
8902 /* Just for use if some languages passes arrays by value. */
8903 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8904 return true;
8905 break;
8907 default:
8908 gcc_unreachable ();
8911 else
8912 return TYPE_ALIGN (type) >= 128;
8914 return false;
8917 /* Gives the alignment boundary, in bits, of an argument with the
8918 specified mode and type. */
8920 static unsigned int
8921 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8923 unsigned int align;
8924 if (type)
8926 /* Since the main variant type is used for call, we convert it to
8927 the main variant type. */
8928 type = TYPE_MAIN_VARIANT (type);
8929 align = TYPE_ALIGN (type);
8931 else
8932 align = GET_MODE_ALIGNMENT (mode);
8933 if (align < PARM_BOUNDARY)
8934 align = PARM_BOUNDARY;
8935 else
8937 static bool warned;
8938 unsigned int saved_align = align;
8940 if (!TARGET_64BIT)
8942 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8943 if (!type)
8945 if (mode == XFmode || mode == XCmode)
8946 align = PARM_BOUNDARY;
8948 else if (!ix86_contains_aligned_value_p (type))
8949 align = PARM_BOUNDARY;
8951 if (align < 128)
8952 align = PARM_BOUNDARY;
8955 if (warn_psabi
8956 && !warned
8957 && align != ix86_compat_function_arg_boundary (mode, type,
8958 saved_align))
8960 warned = true;
8961 inform (input_location,
8962 "The ABI for passing parameters with %d-byte"
8963 " alignment has changed in GCC 4.6",
8964 align / BITS_PER_UNIT);
8968 return align;
8971 /* Return true if N is a possible register number of function value. */
8973 static bool
8974 ix86_function_value_regno_p (const unsigned int regno)
8976 switch (regno)
8978 case AX_REG:
8979 return true;
8980 case DX_REG:
8981 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
8982 case DI_REG:
8983 case SI_REG:
8984 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
8986 case BND0_REG:
8987 case BND1_REG:
8988 return chkp_function_instrumented_p (current_function_decl);
8990 /* Complex values are returned in %st(0)/%st(1) pair. */
8991 case ST0_REG:
8992 case ST1_REG:
8993 /* TODO: The function should depend on current function ABI but
8994 builtins.c would need updating then. Therefore we use the
8995 default ABI. */
8996 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
8997 return false;
8998 return TARGET_FLOAT_RETURNS_IN_80387;
9000 /* Complex values are returned in %xmm0/%xmm1 pair. */
9001 case XMM0_REG:
9002 case XMM1_REG:
9003 return TARGET_SSE;
9005 case MM0_REG:
9006 if (TARGET_MACHO || TARGET_64BIT)
9007 return false;
9008 return TARGET_MMX;
9011 return false;
9014 /* Define how to find the value returned by a function.
9015 VALTYPE is the data type of the value (as a tree).
9016 If the precise function being called is known, FUNC is its FUNCTION_DECL;
9017 otherwise, FUNC is 0. */
9019 static rtx
9020 function_value_32 (machine_mode orig_mode, machine_mode mode,
9021 const_tree fntype, const_tree fn)
9023 unsigned int regno;
9025 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
9026 we normally prevent this case when mmx is not available. However
9027 some ABIs may require the result to be returned like DImode. */
9028 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
9029 regno = FIRST_MMX_REG;
9031 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
9032 we prevent this case when sse is not available. However some ABIs
9033 may require the result to be returned like integer TImode. */
9034 else if (mode == TImode
9035 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
9036 regno = FIRST_SSE_REG;
9038 /* 32-byte vector modes in %ymm0. */
9039 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
9040 regno = FIRST_SSE_REG;
9042 /* 64-byte vector modes in %zmm0. */
9043 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
9044 regno = FIRST_SSE_REG;
9046 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
9047 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
9048 regno = FIRST_FLOAT_REG;
9049 else
9050 /* Most things go in %eax. */
9051 regno = AX_REG;
9053 /* Override FP return register with %xmm0 for local functions when
9054 SSE math is enabled or for functions with sseregparm attribute. */
9055 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
9057 int sse_level = ix86_function_sseregparm (fntype, fn, false);
9058 if (sse_level == -1)
9060 error ("calling %qD with SSE calling convention without "
9061 "SSE/SSE2 enabled", fn);
9062 sorry ("this is a GCC bug that can be worked around by adding "
9063 "attribute used to function called");
9065 else if ((sse_level >= 1 && mode == SFmode)
9066 || (sse_level == 2 && mode == DFmode))
9067 regno = FIRST_SSE_REG;
9070 /* OImode shouldn't be used directly. */
9071 gcc_assert (mode != OImode);
9073 return gen_rtx_REG (orig_mode, regno);
9076 static rtx
9077 function_value_64 (machine_mode orig_mode, machine_mode mode,
9078 const_tree valtype)
9080 rtx ret;
9082 /* Handle libcalls, which don't provide a type node. */
9083 if (valtype == NULL)
9085 unsigned int regno;
9087 switch (mode)
9089 case E_SFmode:
9090 case E_SCmode:
9091 case E_DFmode:
9092 case E_DCmode:
9093 case E_TFmode:
9094 case E_SDmode:
9095 case E_DDmode:
9096 case E_TDmode:
9097 regno = FIRST_SSE_REG;
9098 break;
9099 case E_XFmode:
9100 case E_XCmode:
9101 regno = FIRST_FLOAT_REG;
9102 break;
9103 case E_TCmode:
9104 return NULL;
9105 default:
9106 regno = AX_REG;
9109 return gen_rtx_REG (mode, regno);
9111 else if (POINTER_TYPE_P (valtype))
9113 /* Pointers are always returned in word_mode. */
9114 mode = word_mode;
9117 ret = construct_container (mode, orig_mode, valtype, 1,
9118 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
9119 x86_64_int_return_registers, 0);
9121 /* For zero sized structures, construct_container returns NULL, but we
9122 need to keep rest of compiler happy by returning meaningful value. */
9123 if (!ret)
9124 ret = gen_rtx_REG (orig_mode, AX_REG);
9126 return ret;
9129 static rtx
9130 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
9131 const_tree valtype)
9133 unsigned int regno = AX_REG;
9135 if (TARGET_SSE)
9137 switch (GET_MODE_SIZE (mode))
9139 case 16:
9140 if (valtype != NULL_TREE
9141 && !VECTOR_INTEGER_TYPE_P (valtype)
9142 && !VECTOR_INTEGER_TYPE_P (valtype)
9143 && !INTEGRAL_TYPE_P (valtype)
9144 && !VECTOR_FLOAT_TYPE_P (valtype))
9145 break;
9146 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9147 && !COMPLEX_MODE_P (mode))
9148 regno = FIRST_SSE_REG;
9149 break;
9150 case 8:
9151 case 4:
9152 if (mode == SFmode || mode == DFmode)
9153 regno = FIRST_SSE_REG;
9154 break;
9155 default:
9156 break;
9159 return gen_rtx_REG (orig_mode, regno);
9162 static rtx
9163 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
9164 machine_mode orig_mode, machine_mode mode)
9166 const_tree fn, fntype;
9168 fn = NULL_TREE;
9169 if (fntype_or_decl && DECL_P (fntype_or_decl))
9170 fn = fntype_or_decl;
9171 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
9173 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
9174 || POINTER_BOUNDS_MODE_P (mode))
9175 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
9176 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
9177 return function_value_ms_64 (orig_mode, mode, valtype);
9178 else if (TARGET_64BIT)
9179 return function_value_64 (orig_mode, mode, valtype);
9180 else
9181 return function_value_32 (orig_mode, mode, fntype, fn);
9184 static rtx
9185 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9187 machine_mode mode, orig_mode;
9189 orig_mode = TYPE_MODE (valtype);
9190 mode = type_natural_mode (valtype, NULL, true);
9191 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9194 /* Return an RTX representing a place where a function returns
9195 or recieves pointer bounds or NULL if no bounds are returned.
9197 VALTYPE is a data type of a value returned by the function.
9199 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9200 or FUNCTION_TYPE of the function.
9202 If OUTGOING is false, return a place in which the caller will
9203 see the return value. Otherwise, return a place where a
9204 function returns a value. */
9206 static rtx
9207 ix86_function_value_bounds (const_tree valtype,
9208 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
9209 bool outgoing ATTRIBUTE_UNUSED)
9211 rtx res = NULL_RTX;
9213 if (BOUNDED_TYPE_P (valtype))
9214 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
9215 else if (chkp_type_has_pointer (valtype))
9217 bitmap slots;
9218 rtx bounds[2];
9219 bitmap_iterator bi;
9220 unsigned i, bnd_no = 0;
9222 bitmap_obstack_initialize (NULL);
9223 slots = BITMAP_ALLOC (NULL);
9224 chkp_find_bound_slots (valtype, slots);
9226 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
9228 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
9229 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
9230 gcc_assert (bnd_no < 2);
9231 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
9234 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
9236 BITMAP_FREE (slots);
9237 bitmap_obstack_release (NULL);
9239 else
9240 res = NULL_RTX;
9242 return res;
9245 /* Pointer function arguments and return values are promoted to
9246 word_mode for normal functions. */
9248 static machine_mode
9249 ix86_promote_function_mode (const_tree type, machine_mode mode,
9250 int *punsignedp, const_tree fntype,
9251 int for_return)
9253 if (cfun->machine->func_type == TYPE_NORMAL
9254 && type != NULL_TREE
9255 && POINTER_TYPE_P (type))
9257 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9258 return word_mode;
9260 return default_promote_function_mode (type, mode, punsignedp, fntype,
9261 for_return);
9264 /* Return true if a structure, union or array with MODE containing FIELD
9265 should be accessed using BLKmode. */
9267 static bool
9268 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9270 /* Union with XFmode must be in BLKmode. */
9271 return (mode == XFmode
9272 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9273 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9277 ix86_libcall_value (machine_mode mode)
9279 return ix86_function_value_1 (NULL, NULL, mode, mode);
9282 /* Return true iff type is returned in memory. */
9284 static bool
9285 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9287 #ifdef SUBTARGET_RETURN_IN_MEMORY
9288 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9289 #else
9290 const machine_mode mode = type_natural_mode (type, NULL, true);
9291 HOST_WIDE_INT size;
9293 if (POINTER_BOUNDS_TYPE_P (type))
9294 return false;
9296 if (TARGET_64BIT)
9298 if (ix86_function_type_abi (fntype) == MS_ABI)
9300 size = int_size_in_bytes (type);
9302 /* __m128 is returned in xmm0. */
9303 if ((!type || VECTOR_INTEGER_TYPE_P (type)
9304 || INTEGRAL_TYPE_P (type)
9305 || VECTOR_FLOAT_TYPE_P (type))
9306 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9307 && !COMPLEX_MODE_P (mode)
9308 && (GET_MODE_SIZE (mode) == 16 || size == 16))
9309 return false;
9311 /* Otherwise, the size must be exactly in [1248]. */
9312 return size != 1 && size != 2 && size != 4 && size != 8;
9314 else
9316 int needed_intregs, needed_sseregs;
9318 return examine_argument (mode, type, 1,
9319 &needed_intregs, &needed_sseregs);
9322 else
9324 size = int_size_in_bytes (type);
9326 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9327 bytes in registers. */
9328 if (TARGET_IAMCU)
9329 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9331 if (mode == BLKmode)
9332 return true;
9334 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9335 return false;
9337 if (VECTOR_MODE_P (mode) || mode == TImode)
9339 /* User-created vectors small enough to fit in EAX. */
9340 if (size < 8)
9341 return false;
9343 /* Unless ABI prescibes otherwise,
9344 MMX/3dNow values are returned in MM0 if available. */
9346 if (size == 8)
9347 return TARGET_VECT8_RETURNS || !TARGET_MMX;
9349 /* SSE values are returned in XMM0 if available. */
9350 if (size == 16)
9351 return !TARGET_SSE;
9353 /* AVX values are returned in YMM0 if available. */
9354 if (size == 32)
9355 return !TARGET_AVX;
9357 /* AVX512F values are returned in ZMM0 if available. */
9358 if (size == 64)
9359 return !TARGET_AVX512F;
9362 if (mode == XFmode)
9363 return false;
9365 if (size > 12)
9366 return true;
9368 /* OImode shouldn't be used directly. */
9369 gcc_assert (mode != OImode);
9371 return false;
9373 #endif
9377 /* Create the va_list data type. */
9379 static tree
9380 ix86_build_builtin_va_list_64 (void)
9382 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9384 record = lang_hooks.types.make_type (RECORD_TYPE);
9385 type_decl = build_decl (BUILTINS_LOCATION,
9386 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9388 f_gpr = build_decl (BUILTINS_LOCATION,
9389 FIELD_DECL, get_identifier ("gp_offset"),
9390 unsigned_type_node);
9391 f_fpr = build_decl (BUILTINS_LOCATION,
9392 FIELD_DECL, get_identifier ("fp_offset"),
9393 unsigned_type_node);
9394 f_ovf = build_decl (BUILTINS_LOCATION,
9395 FIELD_DECL, get_identifier ("overflow_arg_area"),
9396 ptr_type_node);
9397 f_sav = build_decl (BUILTINS_LOCATION,
9398 FIELD_DECL, get_identifier ("reg_save_area"),
9399 ptr_type_node);
9401 va_list_gpr_counter_field = f_gpr;
9402 va_list_fpr_counter_field = f_fpr;
9404 DECL_FIELD_CONTEXT (f_gpr) = record;
9405 DECL_FIELD_CONTEXT (f_fpr) = record;
9406 DECL_FIELD_CONTEXT (f_ovf) = record;
9407 DECL_FIELD_CONTEXT (f_sav) = record;
9409 TYPE_STUB_DECL (record) = type_decl;
9410 TYPE_NAME (record) = type_decl;
9411 TYPE_FIELDS (record) = f_gpr;
9412 DECL_CHAIN (f_gpr) = f_fpr;
9413 DECL_CHAIN (f_fpr) = f_ovf;
9414 DECL_CHAIN (f_ovf) = f_sav;
9416 layout_type (record);
9418 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
9419 NULL_TREE, TYPE_ATTRIBUTES (record));
9421 /* The correct type is an array type of one element. */
9422 return build_array_type (record, build_index_type (size_zero_node));
9425 /* Setup the builtin va_list data type and for 64-bit the additional
9426 calling convention specific va_list data types. */
9428 static tree
9429 ix86_build_builtin_va_list (void)
9431 if (TARGET_64BIT)
9433 /* Initialize ABI specific va_list builtin types.
9435 In lto1, we can encounter two va_list types:
9436 - one as a result of the type-merge across TUs, and
9437 - the one constructed here.
9438 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
9439 a type identity check in canonical_va_list_type based on
9440 TYPE_MAIN_VARIANT (which we used to have) will not work.
9441 Instead, we tag each va_list_type_node with its unique attribute, and
9442 look for the attribute in the type identity check in
9443 canonical_va_list_type.
9445 Tagging sysv_va_list_type_node directly with the attribute is
9446 problematic since it's a array of one record, which will degrade into a
9447 pointer to record when used as parameter (see build_va_arg comments for
9448 an example), dropping the attribute in the process. So we tag the
9449 record instead. */
9451 /* For SYSV_ABI we use an array of one record. */
9452 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
9454 /* For MS_ABI we use plain pointer to argument area. */
9455 tree char_ptr_type = build_pointer_type (char_type_node);
9456 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
9457 TYPE_ATTRIBUTES (char_ptr_type));
9458 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
9460 return ((ix86_abi == MS_ABI)
9461 ? ms_va_list_type_node
9462 : sysv_va_list_type_node);
9464 else
9466 /* For i386 we use plain pointer to argument area. */
9467 return build_pointer_type (char_type_node);
9471 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9473 static void
9474 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9476 rtx save_area, mem;
9477 alias_set_type set;
9478 int i, max;
9480 /* GPR size of varargs save area. */
9481 if (cfun->va_list_gpr_size)
9482 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9483 else
9484 ix86_varargs_gpr_size = 0;
9486 /* FPR size of varargs save area. We don't need it if we don't pass
9487 anything in SSE registers. */
9488 if (TARGET_SSE && cfun->va_list_fpr_size)
9489 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9490 else
9491 ix86_varargs_fpr_size = 0;
9493 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9494 return;
9496 save_area = frame_pointer_rtx;
9497 set = get_varargs_alias_set ();
9499 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9500 if (max > X86_64_REGPARM_MAX)
9501 max = X86_64_REGPARM_MAX;
9503 for (i = cum->regno; i < max; i++)
9505 mem = gen_rtx_MEM (word_mode,
9506 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9507 MEM_NOTRAP_P (mem) = 1;
9508 set_mem_alias_set (mem, set);
9509 emit_move_insn (mem,
9510 gen_rtx_REG (word_mode,
9511 x86_64_int_parameter_registers[i]));
9514 if (ix86_varargs_fpr_size)
9516 machine_mode smode;
9517 rtx_code_label *label;
9518 rtx test;
9520 /* Now emit code to save SSE registers. The AX parameter contains number
9521 of SSE parameter registers used to call this function, though all we
9522 actually check here is the zero/non-zero status. */
9524 label = gen_label_rtx ();
9525 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
9526 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
9527 label));
9529 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9530 we used movdqa (i.e. TImode) instead? Perhaps even better would
9531 be if we could determine the real mode of the data, via a hook
9532 into pass_stdarg. Ignore all that for now. */
9533 smode = V4SFmode;
9534 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
9535 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
9537 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
9538 if (max > X86_64_SSE_REGPARM_MAX)
9539 max = X86_64_SSE_REGPARM_MAX;
9541 for (i = cum->sse_regno; i < max; ++i)
9543 mem = plus_constant (Pmode, save_area,
9544 i * 16 + ix86_varargs_gpr_size);
9545 mem = gen_rtx_MEM (smode, mem);
9546 MEM_NOTRAP_P (mem) = 1;
9547 set_mem_alias_set (mem, set);
9548 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
9550 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
9553 emit_label (label);
9557 static void
9558 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
9560 alias_set_type set = get_varargs_alias_set ();
9561 int i;
9563 /* Reset to zero, as there might be a sysv vaarg used
9564 before. */
9565 ix86_varargs_gpr_size = 0;
9566 ix86_varargs_fpr_size = 0;
9568 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
9570 rtx reg, mem;
9572 mem = gen_rtx_MEM (Pmode,
9573 plus_constant (Pmode, virtual_incoming_args_rtx,
9574 i * UNITS_PER_WORD));
9575 MEM_NOTRAP_P (mem) = 1;
9576 set_mem_alias_set (mem, set);
9578 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
9579 emit_move_insn (mem, reg);
9583 static void
9584 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
9585 tree type, int *, int no_rtl)
9587 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9588 CUMULATIVE_ARGS next_cum;
9589 tree fntype;
9591 /* This argument doesn't appear to be used anymore. Which is good,
9592 because the old code here didn't suppress rtl generation. */
9593 gcc_assert (!no_rtl);
9595 if (!TARGET_64BIT)
9596 return;
9598 fntype = TREE_TYPE (current_function_decl);
9600 /* For varargs, we do not want to skip the dummy va_dcl argument.
9601 For stdargs, we do want to skip the last named argument. */
9602 next_cum = *cum;
9603 if (stdarg_p (fntype))
9604 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9605 true);
9607 if (cum->call_abi == MS_ABI)
9608 setup_incoming_varargs_ms_64 (&next_cum);
9609 else
9610 setup_incoming_varargs_64 (&next_cum);
9613 static void
9614 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
9615 machine_mode mode,
9616 tree type,
9617 int *pretend_size ATTRIBUTE_UNUSED,
9618 int no_rtl)
9620 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9621 CUMULATIVE_ARGS next_cum;
9622 tree fntype;
9623 rtx save_area;
9624 int bnd_reg, i, max;
9626 gcc_assert (!no_rtl);
9628 /* Do nothing if we use plain pointer to argument area. */
9629 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
9630 return;
9632 fntype = TREE_TYPE (current_function_decl);
9634 /* For varargs, we do not want to skip the dummy va_dcl argument.
9635 For stdargs, we do want to skip the last named argument. */
9636 next_cum = *cum;
9637 if (stdarg_p (fntype))
9638 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9639 true);
9640 save_area = frame_pointer_rtx;
9642 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9643 if (max > X86_64_REGPARM_MAX)
9644 max = X86_64_REGPARM_MAX;
9646 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
9647 if (chkp_function_instrumented_p (current_function_decl))
9648 for (i = cum->regno; i < max; i++)
9650 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
9651 rtx ptr = gen_rtx_REG (Pmode,
9652 x86_64_int_parameter_registers[i]);
9653 rtx bounds;
9655 if (bnd_reg <= LAST_BND_REG)
9656 bounds = gen_rtx_REG (BNDmode, bnd_reg);
9657 else
9659 rtx ldx_addr =
9660 plus_constant (Pmode, arg_pointer_rtx,
9661 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
9662 bounds = gen_reg_rtx (BNDmode);
9663 emit_insn (BNDmode == BND64mode
9664 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
9665 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
9668 emit_insn (BNDmode == BND64mode
9669 ? gen_bnd64_stx (addr, ptr, bounds)
9670 : gen_bnd32_stx (addr, ptr, bounds));
9672 bnd_reg++;
9677 /* Checks if TYPE is of kind va_list char *. */
9679 static bool
9680 is_va_list_char_pointer (tree type)
9682 tree canonic;
9684 /* For 32-bit it is always true. */
9685 if (!TARGET_64BIT)
9686 return true;
9687 canonic = ix86_canonical_va_list_type (type);
9688 return (canonic == ms_va_list_type_node
9689 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
9692 /* Implement va_start. */
9694 static void
9695 ix86_va_start (tree valist, rtx nextarg)
9697 HOST_WIDE_INT words, n_gpr, n_fpr;
9698 tree f_gpr, f_fpr, f_ovf, f_sav;
9699 tree gpr, fpr, ovf, sav, t;
9700 tree type;
9701 rtx ovf_rtx;
9703 if (flag_split_stack
9704 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9706 unsigned int scratch_regno;
9708 /* When we are splitting the stack, we can't refer to the stack
9709 arguments using internal_arg_pointer, because they may be on
9710 the old stack. The split stack prologue will arrange to
9711 leave a pointer to the old stack arguments in a scratch
9712 register, which we here copy to a pseudo-register. The split
9713 stack prologue can't set the pseudo-register directly because
9714 it (the prologue) runs before any registers have been saved. */
9716 scratch_regno = split_stack_prologue_scratch_regno ();
9717 if (scratch_regno != INVALID_REGNUM)
9719 rtx reg;
9720 rtx_insn *seq;
9722 reg = gen_reg_rtx (Pmode);
9723 cfun->machine->split_stack_varargs_pointer = reg;
9725 start_sequence ();
9726 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
9727 seq = get_insns ();
9728 end_sequence ();
9730 push_topmost_sequence ();
9731 emit_insn_after (seq, entry_of_function ());
9732 pop_topmost_sequence ();
9736 /* Only 64bit target needs something special. */
9737 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9739 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9740 std_expand_builtin_va_start (valist, nextarg);
9741 else
9743 rtx va_r, next;
9745 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
9746 next = expand_binop (ptr_mode, add_optab,
9747 cfun->machine->split_stack_varargs_pointer,
9748 crtl->args.arg_offset_rtx,
9749 NULL_RTX, 0, OPTAB_LIB_WIDEN);
9750 convert_move (va_r, next, 0);
9752 /* Store zero bounds for va_list. */
9753 if (chkp_function_instrumented_p (current_function_decl))
9754 chkp_expand_bounds_reset_for_mem (valist,
9755 make_tree (TREE_TYPE (valist),
9756 next));
9759 return;
9762 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9763 f_fpr = DECL_CHAIN (f_gpr);
9764 f_ovf = DECL_CHAIN (f_fpr);
9765 f_sav = DECL_CHAIN (f_ovf);
9767 valist = build_simple_mem_ref (valist);
9768 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9769 /* The following should be folded into the MEM_REF offset. */
9770 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9771 f_gpr, NULL_TREE);
9772 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9773 f_fpr, NULL_TREE);
9774 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9775 f_ovf, NULL_TREE);
9776 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9777 f_sav, NULL_TREE);
9779 /* Count number of gp and fp argument registers used. */
9780 words = crtl->args.info.words;
9781 n_gpr = crtl->args.info.regno;
9782 n_fpr = crtl->args.info.sse_regno;
9784 if (cfun->va_list_gpr_size)
9786 type = TREE_TYPE (gpr);
9787 t = build2 (MODIFY_EXPR, type,
9788 gpr, build_int_cst (type, n_gpr * 8));
9789 TREE_SIDE_EFFECTS (t) = 1;
9790 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9793 if (TARGET_SSE && cfun->va_list_fpr_size)
9795 type = TREE_TYPE (fpr);
9796 t = build2 (MODIFY_EXPR, type, fpr,
9797 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9798 TREE_SIDE_EFFECTS (t) = 1;
9799 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9802 /* Find the overflow area. */
9803 type = TREE_TYPE (ovf);
9804 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9805 ovf_rtx = crtl->args.internal_arg_pointer;
9806 else
9807 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9808 t = make_tree (type, ovf_rtx);
9809 if (words != 0)
9810 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9812 /* Store zero bounds for overflow area pointer. */
9813 if (chkp_function_instrumented_p (current_function_decl))
9814 chkp_expand_bounds_reset_for_mem (ovf, t);
9816 t = build2 (MODIFY_EXPR, type, ovf, t);
9817 TREE_SIDE_EFFECTS (t) = 1;
9818 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9820 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9822 /* Find the register save area.
9823 Prologue of the function save it right above stack frame. */
9824 type = TREE_TYPE (sav);
9825 t = make_tree (type, frame_pointer_rtx);
9826 if (!ix86_varargs_gpr_size)
9827 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9829 /* Store zero bounds for save area pointer. */
9830 if (chkp_function_instrumented_p (current_function_decl))
9831 chkp_expand_bounds_reset_for_mem (sav, t);
9833 t = build2 (MODIFY_EXPR, type, sav, t);
9834 TREE_SIDE_EFFECTS (t) = 1;
9835 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9839 /* Implement va_arg. */
9841 static tree
9842 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9843 gimple_seq *post_p)
9845 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9846 tree f_gpr, f_fpr, f_ovf, f_sav;
9847 tree gpr, fpr, ovf, sav, t;
9848 int size, rsize;
9849 tree lab_false, lab_over = NULL_TREE;
9850 tree addr, t2;
9851 rtx container;
9852 int indirect_p = 0;
9853 tree ptrtype;
9854 machine_mode nat_mode;
9855 unsigned int arg_boundary;
9857 /* Only 64bit target needs something special. */
9858 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9859 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9861 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9862 f_fpr = DECL_CHAIN (f_gpr);
9863 f_ovf = DECL_CHAIN (f_fpr);
9864 f_sav = DECL_CHAIN (f_ovf);
9866 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9867 valist, f_gpr, NULL_TREE);
9869 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9870 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9871 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9873 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9874 if (indirect_p)
9875 type = build_pointer_type (type);
9876 size = int_size_in_bytes (type);
9877 rsize = CEIL (size, UNITS_PER_WORD);
9879 nat_mode = type_natural_mode (type, NULL, false);
9880 switch (nat_mode)
9882 case E_V8SFmode:
9883 case E_V8SImode:
9884 case E_V32QImode:
9885 case E_V16HImode:
9886 case E_V4DFmode:
9887 case E_V4DImode:
9888 case E_V16SFmode:
9889 case E_V16SImode:
9890 case E_V64QImode:
9891 case E_V32HImode:
9892 case E_V8DFmode:
9893 case E_V8DImode:
9894 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9895 if (!TARGET_64BIT_MS_ABI)
9897 container = NULL;
9898 break;
9900 /* FALLTHRU */
9902 default:
9903 container = construct_container (nat_mode, TYPE_MODE (type),
9904 type, 0, X86_64_REGPARM_MAX,
9905 X86_64_SSE_REGPARM_MAX, intreg,
9907 break;
9910 /* Pull the value out of the saved registers. */
9912 addr = create_tmp_var (ptr_type_node, "addr");
9914 if (container)
9916 int needed_intregs, needed_sseregs;
9917 bool need_temp;
9918 tree int_addr, sse_addr;
9920 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9921 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9923 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9925 need_temp = (!REG_P (container)
9926 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9927 || TYPE_ALIGN (type) > 128));
9929 /* In case we are passing structure, verify that it is consecutive block
9930 on the register save area. If not we need to do moves. */
9931 if (!need_temp && !REG_P (container))
9933 /* Verify that all registers are strictly consecutive */
9934 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9936 int i;
9938 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9940 rtx slot = XVECEXP (container, 0, i);
9941 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9942 || INTVAL (XEXP (slot, 1)) != i * 16)
9943 need_temp = true;
9946 else
9948 int i;
9950 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9952 rtx slot = XVECEXP (container, 0, i);
9953 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9954 || INTVAL (XEXP (slot, 1)) != i * 8)
9955 need_temp = true;
9959 if (!need_temp)
9961 int_addr = addr;
9962 sse_addr = addr;
9964 else
9966 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9967 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9970 /* First ensure that we fit completely in registers. */
9971 if (needed_intregs)
9973 t = build_int_cst (TREE_TYPE (gpr),
9974 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9975 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9976 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9977 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9978 gimplify_and_add (t, pre_p);
9980 if (needed_sseregs)
9982 t = build_int_cst (TREE_TYPE (fpr),
9983 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9984 + X86_64_REGPARM_MAX * 8);
9985 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9986 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9987 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9988 gimplify_and_add (t, pre_p);
9991 /* Compute index to start of area used for integer regs. */
9992 if (needed_intregs)
9994 /* int_addr = gpr + sav; */
9995 t = fold_build_pointer_plus (sav, gpr);
9996 gimplify_assign (int_addr, t, pre_p);
9998 if (needed_sseregs)
10000 /* sse_addr = fpr + sav; */
10001 t = fold_build_pointer_plus (sav, fpr);
10002 gimplify_assign (sse_addr, t, pre_p);
10004 if (need_temp)
10006 int i, prev_size = 0;
10007 tree temp = create_tmp_var (type, "va_arg_tmp");
10009 /* addr = &temp; */
10010 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
10011 gimplify_assign (addr, t, pre_p);
10013 for (i = 0; i < XVECLEN (container, 0); i++)
10015 rtx slot = XVECEXP (container, 0, i);
10016 rtx reg = XEXP (slot, 0);
10017 machine_mode mode = GET_MODE (reg);
10018 tree piece_type;
10019 tree addr_type;
10020 tree daddr_type;
10021 tree src_addr, src;
10022 int src_offset;
10023 tree dest_addr, dest;
10024 int cur_size = GET_MODE_SIZE (mode);
10026 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
10027 prev_size = INTVAL (XEXP (slot, 1));
10028 if (prev_size + cur_size > size)
10030 cur_size = size - prev_size;
10031 unsigned int nbits = cur_size * BITS_PER_UNIT;
10032 if (!int_mode_for_size (nbits, 1).exists (&mode))
10033 mode = QImode;
10035 piece_type = lang_hooks.types.type_for_mode (mode, 1);
10036 if (mode == GET_MODE (reg))
10037 addr_type = build_pointer_type (piece_type);
10038 else
10039 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10040 true);
10041 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10042 true);
10044 if (SSE_REGNO_P (REGNO (reg)))
10046 src_addr = sse_addr;
10047 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
10049 else
10051 src_addr = int_addr;
10052 src_offset = REGNO (reg) * 8;
10054 src_addr = fold_convert (addr_type, src_addr);
10055 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
10057 dest_addr = fold_convert (daddr_type, addr);
10058 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
10059 if (cur_size == GET_MODE_SIZE (mode))
10061 src = build_va_arg_indirect_ref (src_addr);
10062 dest = build_va_arg_indirect_ref (dest_addr);
10064 gimplify_assign (dest, src, pre_p);
10066 else
10068 tree copy
10069 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
10070 3, dest_addr, src_addr,
10071 size_int (cur_size));
10072 gimplify_and_add (copy, pre_p);
10074 prev_size += cur_size;
10078 if (needed_intregs)
10080 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
10081 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
10082 gimplify_assign (gpr, t, pre_p);
10085 if (needed_sseregs)
10087 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
10088 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
10089 gimplify_assign (unshare_expr (fpr), t, pre_p);
10092 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10094 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
10097 /* ... otherwise out of the overflow area. */
10099 /* When we align parameter on stack for caller, if the parameter
10100 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
10101 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
10102 here with caller. */
10103 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
10104 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
10105 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
10107 /* Care for on-stack alignment if needed. */
10108 if (arg_boundary <= 64 || size == 0)
10109 t = ovf;
10110 else
10112 HOST_WIDE_INT align = arg_boundary / 8;
10113 t = fold_build_pointer_plus_hwi (ovf, align - 1);
10114 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10115 build_int_cst (TREE_TYPE (t), -align));
10118 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10119 gimplify_assign (addr, t, pre_p);
10121 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
10122 gimplify_assign (unshare_expr (ovf), t, pre_p);
10124 if (container)
10125 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
10127 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
10128 addr = fold_convert (ptrtype, addr);
10130 if (indirect_p)
10131 addr = build_va_arg_indirect_ref (addr);
10132 return build_va_arg_indirect_ref (addr);
10135 /* Return true if OPNUM's MEM should be matched
10136 in movabs* patterns. */
10138 bool
10139 ix86_check_movabs (rtx insn, int opnum)
10141 rtx set, mem;
10143 set = PATTERN (insn);
10144 if (GET_CODE (set) == PARALLEL)
10145 set = XVECEXP (set, 0, 0);
10146 gcc_assert (GET_CODE (set) == SET);
10147 mem = XEXP (set, opnum);
10148 while (SUBREG_P (mem))
10149 mem = SUBREG_REG (mem);
10150 gcc_assert (MEM_P (mem));
10151 return volatile_ok || !MEM_VOLATILE_P (mem);
10154 /* Return false if INSN contains a MEM with a non-default address space. */
10155 bool
10156 ix86_check_no_addr_space (rtx insn)
10158 subrtx_var_iterator::array_type array;
10159 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
10161 rtx x = *iter;
10162 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
10163 return false;
10165 return true;
10168 /* Initialize the table of extra 80387 mathematical constants. */
10170 static void
10171 init_ext_80387_constants (void)
10173 static const char * cst[5] =
10175 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
10176 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
10177 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
10178 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
10179 "3.1415926535897932385128089594061862044", /* 4: fldpi */
10181 int i;
10183 for (i = 0; i < 5; i++)
10185 real_from_string (&ext_80387_constants_table[i], cst[i]);
10186 /* Ensure each constant is rounded to XFmode precision. */
10187 real_convert (&ext_80387_constants_table[i],
10188 XFmode, &ext_80387_constants_table[i]);
10191 ext_80387_constants_init = 1;
10194 /* Return non-zero if the constant is something that
10195 can be loaded with a special instruction. */
10198 standard_80387_constant_p (rtx x)
10200 machine_mode mode = GET_MODE (x);
10202 const REAL_VALUE_TYPE *r;
10204 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10205 return -1;
10207 if (x == CONST0_RTX (mode))
10208 return 1;
10209 if (x == CONST1_RTX (mode))
10210 return 2;
10212 r = CONST_DOUBLE_REAL_VALUE (x);
10214 /* For XFmode constants, try to find a special 80387 instruction when
10215 optimizing for size or on those CPUs that benefit from them. */
10216 if (mode == XFmode
10217 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10219 int i;
10221 if (! ext_80387_constants_init)
10222 init_ext_80387_constants ();
10224 for (i = 0; i < 5; i++)
10225 if (real_identical (r, &ext_80387_constants_table[i]))
10226 return i + 3;
10229 /* Load of the constant -0.0 or -1.0 will be split as
10230 fldz;fchs or fld1;fchs sequence. */
10231 if (real_isnegzero (r))
10232 return 8;
10233 if (real_identical (r, &dconstm1))
10234 return 9;
10236 return 0;
10239 /* Return the opcode of the special instruction to be used to load
10240 the constant X. */
10242 const char *
10243 standard_80387_constant_opcode (rtx x)
10245 switch (standard_80387_constant_p (x))
10247 case 1:
10248 return "fldz";
10249 case 2:
10250 return "fld1";
10251 case 3:
10252 return "fldlg2";
10253 case 4:
10254 return "fldln2";
10255 case 5:
10256 return "fldl2e";
10257 case 6:
10258 return "fldl2t";
10259 case 7:
10260 return "fldpi";
10261 case 8:
10262 case 9:
10263 return "#";
10264 default:
10265 gcc_unreachable ();
10269 /* Return the CONST_DOUBLE representing the 80387 constant that is
10270 loaded by the specified special instruction. The argument IDX
10271 matches the return value from standard_80387_constant_p. */
10274 standard_80387_constant_rtx (int idx)
10276 int i;
10278 if (! ext_80387_constants_init)
10279 init_ext_80387_constants ();
10281 switch (idx)
10283 case 3:
10284 case 4:
10285 case 5:
10286 case 6:
10287 case 7:
10288 i = idx - 3;
10289 break;
10291 default:
10292 gcc_unreachable ();
10295 return const_double_from_real_value (ext_80387_constants_table[i],
10296 XFmode);
10299 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
10300 in supported SSE/AVX vector mode. */
10303 standard_sse_constant_p (rtx x, machine_mode pred_mode)
10305 machine_mode mode;
10307 if (!TARGET_SSE)
10308 return 0;
10310 mode = GET_MODE (x);
10312 if (x == const0_rtx || const0_operand (x, mode))
10313 return 1;
10315 if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10317 /* VOIDmode integer constant, get mode from the predicate. */
10318 if (mode == VOIDmode)
10319 mode = pred_mode;
10321 switch (GET_MODE_SIZE (mode))
10323 case 64:
10324 if (TARGET_AVX512F)
10325 return 2;
10326 break;
10327 case 32:
10328 if (TARGET_AVX2)
10329 return 2;
10330 break;
10331 case 16:
10332 if (TARGET_SSE2)
10333 return 2;
10334 break;
10335 case 0:
10336 /* VOIDmode */
10337 gcc_unreachable ();
10338 default:
10339 break;
10343 return 0;
10346 /* Return the opcode of the special instruction to be used to load
10347 the constant X. */
10349 const char *
10350 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
10352 machine_mode mode;
10354 gcc_assert (TARGET_SSE);
10356 mode = GET_MODE (x);
10358 if (x == const0_rtx || const0_operand (x, mode))
10360 switch (get_attr_mode (insn))
10362 case MODE_XI:
10363 return "vpxord\t%g0, %g0, %g0";
10364 case MODE_OI:
10365 return (TARGET_AVX512VL
10366 ? "vpxord\t%x0, %x0, %x0"
10367 : "vpxor\t%x0, %x0, %x0");
10368 case MODE_TI:
10369 return (TARGET_AVX512VL
10370 ? "vpxord\t%t0, %t0, %t0"
10371 : "%vpxor\t%0, %d0");
10373 case MODE_V8DF:
10374 return (TARGET_AVX512DQ
10375 ? "vxorpd\t%g0, %g0, %g0"
10376 : "vpxorq\t%g0, %g0, %g0");
10377 case MODE_V4DF:
10378 return "vxorpd\t%x0, %x0, %x0";
10379 case MODE_V2DF:
10380 return "%vxorpd\t%0, %d0";
10382 case MODE_V16SF:
10383 return (TARGET_AVX512DQ
10384 ? "vxorps\t%g0, %g0, %g0"
10385 : "vpxord\t%g0, %g0, %g0");
10386 case MODE_V8SF:
10387 return "vxorps\t%x0, %x0, %x0";
10388 case MODE_V4SF:
10389 return "%vxorps\t%0, %d0";
10391 default:
10392 gcc_unreachable ();
10395 else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10397 enum attr_mode insn_mode = get_attr_mode (insn);
10399 switch (insn_mode)
10401 case MODE_XI:
10402 case MODE_V8DF:
10403 case MODE_V16SF:
10404 gcc_assert (TARGET_AVX512F);
10405 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10407 case MODE_OI:
10408 case MODE_V4DF:
10409 case MODE_V8SF:
10410 gcc_assert (TARGET_AVX2);
10411 /* FALLTHRU */
10412 case MODE_TI:
10413 case MODE_V2DF:
10414 case MODE_V4SF:
10415 gcc_assert (TARGET_SSE2);
10416 return (TARGET_AVX
10417 ? "vpcmpeqd\t%0, %0, %0"
10418 : "pcmpeqd\t%0, %0");
10420 default:
10421 gcc_unreachable ();
10425 gcc_unreachable ();
10428 /* Returns true if INSN can be transformed from a memory load
10429 to a supported FP constant load. */
10431 bool
10432 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
10434 rtx src = find_constant_src (insn);
10436 gcc_assert (REG_P (dst));
10438 if (src == NULL
10439 || (SSE_REGNO_P (REGNO (dst))
10440 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
10441 || (STACK_REGNO_P (REGNO (dst))
10442 && standard_80387_constant_p (src) < 1))
10443 return false;
10445 return true;
10448 /* Returns true if OP contains a symbol reference */
10450 bool
10451 symbolic_reference_mentioned_p (rtx op)
10453 const char *fmt;
10454 int i;
10456 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10457 return true;
10459 fmt = GET_RTX_FORMAT (GET_CODE (op));
10460 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10462 if (fmt[i] == 'E')
10464 int j;
10466 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10467 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10468 return true;
10471 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10472 return true;
10475 return false;
10478 /* Return true if it is appropriate to emit `ret' instructions in the
10479 body of a function. Do this only if the epilogue is simple, needing a
10480 couple of insns. Prior to reloading, we can't tell how many registers
10481 must be saved, so return false then. Return false if there is no frame
10482 marker to de-allocate. */
10484 bool
10485 ix86_can_use_return_insn_p (void)
10487 struct ix86_frame frame;
10489 if (ix86_function_naked (current_function_decl))
10490 return false;
10492 /* Don't use `ret' instruction in interrupt handler. */
10493 if (! reload_completed
10494 || frame_pointer_needed
10495 || cfun->machine->func_type != TYPE_NORMAL)
10496 return 0;
10498 /* Don't allow more than 32k pop, since that's all we can do
10499 with one instruction. */
10500 if (crtl->args.pops_args && crtl->args.size >= 32768)
10501 return 0;
10503 frame = cfun->machine->frame;
10504 return (frame.stack_pointer_offset == UNITS_PER_WORD
10505 && (frame.nregs + frame.nsseregs) == 0);
10508 /* Value should be nonzero if functions must have frame pointers.
10509 Zero means the frame pointer need not be set up (and parms may
10510 be accessed via the stack pointer) in functions that seem suitable. */
10512 static bool
10513 ix86_frame_pointer_required (void)
10515 /* If we accessed previous frames, then the generated code expects
10516 to be able to access the saved ebp value in our frame. */
10517 if (cfun->machine->accesses_prev_frame)
10518 return true;
10520 /* Several x86 os'es need a frame pointer for other reasons,
10521 usually pertaining to setjmp. */
10522 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10523 return true;
10525 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10526 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10527 return true;
10529 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10530 allocation is 4GB. */
10531 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10532 return true;
10534 /* SSE saves require frame-pointer when stack is misaligned. */
10535 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
10536 return true;
10538 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10539 turns off the frame pointer by default. Turn it back on now if
10540 we've not got a leaf function. */
10541 if (TARGET_OMIT_LEAF_FRAME_POINTER
10542 && (!crtl->is_leaf
10543 || ix86_current_function_calls_tls_descriptor))
10544 return true;
10546 if (crtl->profile && !flag_fentry)
10547 return true;
10549 return false;
10552 /* Record that the current function accesses previous call frames. */
10554 void
10555 ix86_setup_frame_addresses (void)
10557 cfun->machine->accesses_prev_frame = 1;
10560 #ifndef USE_HIDDEN_LINKONCE
10561 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10562 # define USE_HIDDEN_LINKONCE 1
10563 # else
10564 # define USE_HIDDEN_LINKONCE 0
10565 # endif
10566 #endif
10568 static int pic_labels_used;
10570 /* Fills in the label name that should be used for a pc thunk for
10571 the given register. */
10573 static void
10574 get_pc_thunk_name (char name[32], unsigned int regno)
10576 gcc_assert (!TARGET_64BIT);
10578 if (USE_HIDDEN_LINKONCE)
10579 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
10580 else
10581 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
10585 /* This function generates code for -fpic that loads %ebx with
10586 the return address of the caller and then returns. */
10588 static void
10589 ix86_code_end (void)
10591 rtx xops[2];
10592 int regno;
10594 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
10596 char name[32];
10597 tree decl;
10599 if (!(pic_labels_used & (1 << regno)))
10600 continue;
10602 get_pc_thunk_name (name, regno);
10604 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
10605 get_identifier (name),
10606 build_function_type_list (void_type_node, NULL_TREE));
10607 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
10608 NULL_TREE, void_type_node);
10609 TREE_PUBLIC (decl) = 1;
10610 TREE_STATIC (decl) = 1;
10611 DECL_IGNORED_P (decl) = 1;
10613 #if TARGET_MACHO
10614 if (TARGET_MACHO)
10616 switch_to_section (darwin_sections[picbase_thunk_section]);
10617 fputs ("\t.weak_definition\t", asm_out_file);
10618 assemble_name (asm_out_file, name);
10619 fputs ("\n\t.private_extern\t", asm_out_file);
10620 assemble_name (asm_out_file, name);
10621 putc ('\n', asm_out_file);
10622 ASM_OUTPUT_LABEL (asm_out_file, name);
10623 DECL_WEAK (decl) = 1;
10625 else
10626 #endif
10627 if (USE_HIDDEN_LINKONCE)
10629 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
10631 targetm.asm_out.unique_section (decl, 0);
10632 switch_to_section (get_named_section (decl, NULL, 0));
10634 targetm.asm_out.globalize_label (asm_out_file, name);
10635 fputs ("\t.hidden\t", asm_out_file);
10636 assemble_name (asm_out_file, name);
10637 putc ('\n', asm_out_file);
10638 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
10640 else
10642 switch_to_section (text_section);
10643 ASM_OUTPUT_LABEL (asm_out_file, name);
10646 DECL_INITIAL (decl) = make_node (BLOCK);
10647 current_function_decl = decl;
10648 allocate_struct_function (decl, false);
10649 init_function_start (decl);
10650 /* We're about to hide the function body from callees of final_* by
10651 emitting it directly; tell them we're a thunk, if they care. */
10652 cfun->is_thunk = true;
10653 first_function_block_is_cold = false;
10654 /* Make sure unwind info is emitted for the thunk if needed. */
10655 final_start_function (emit_barrier (), asm_out_file, 1);
10657 /* Pad stack IP move with 4 instructions (two NOPs count
10658 as one instruction). */
10659 if (TARGET_PAD_SHORT_FUNCTION)
10661 int i = 8;
10663 while (i--)
10664 fputs ("\tnop\n", asm_out_file);
10667 xops[0] = gen_rtx_REG (Pmode, regno);
10668 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10669 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
10670 output_asm_insn ("%!ret", NULL);
10671 final_end_function ();
10672 init_insn_lengths ();
10673 free_after_compilation (cfun);
10674 set_cfun (NULL);
10675 current_function_decl = NULL;
10678 if (flag_split_stack)
10679 file_end_indicate_split_stack ();
10682 /* Emit code for the SET_GOT patterns. */
10684 const char *
10685 output_set_got (rtx dest, rtx label)
10687 rtx xops[3];
10689 xops[0] = dest;
10691 if (TARGET_VXWORKS_RTP && flag_pic)
10693 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
10694 xops[2] = gen_rtx_MEM (Pmode,
10695 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
10696 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
10698 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
10699 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
10700 an unadorned address. */
10701 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
10702 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
10703 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
10704 return "";
10707 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
10709 if (flag_pic)
10711 char name[32];
10712 get_pc_thunk_name (name, REGNO (dest));
10713 pic_labels_used |= 1 << REGNO (dest);
10715 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
10716 xops[2] = gen_rtx_MEM (QImode, xops[2]);
10717 output_asm_insn ("%!call\t%X2", xops);
10719 #if TARGET_MACHO
10720 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
10721 This is what will be referenced by the Mach-O PIC subsystem. */
10722 if (machopic_should_output_picbase_label () || !label)
10723 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
10725 /* When we are restoring the pic base at the site of a nonlocal label,
10726 and we decided to emit the pic base above, we will still output a
10727 local label used for calculating the correction offset (even though
10728 the offset will be 0 in that case). */
10729 if (label)
10730 targetm.asm_out.internal_label (asm_out_file, "L",
10731 CODE_LABEL_NUMBER (label));
10732 #endif
10734 else
10736 if (TARGET_MACHO)
10737 /* We don't need a pic base, we're not producing pic. */
10738 gcc_unreachable ();
10740 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
10741 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
10742 targetm.asm_out.internal_label (asm_out_file, "L",
10743 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
10746 if (!TARGET_MACHO)
10747 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
10749 return "";
10752 /* Generate an "push" pattern for input ARG. */
10754 static rtx
10755 gen_push (rtx arg)
10757 struct machine_function *m = cfun->machine;
10759 if (m->fs.cfa_reg == stack_pointer_rtx)
10760 m->fs.cfa_offset += UNITS_PER_WORD;
10761 m->fs.sp_offset += UNITS_PER_WORD;
10763 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10764 arg = gen_rtx_REG (word_mode, REGNO (arg));
10766 return gen_rtx_SET (gen_rtx_MEM (word_mode,
10767 gen_rtx_PRE_DEC (Pmode,
10768 stack_pointer_rtx)),
10769 arg);
10772 /* Generate an "pop" pattern for input ARG. */
10774 static rtx
10775 gen_pop (rtx arg)
10777 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10778 arg = gen_rtx_REG (word_mode, REGNO (arg));
10780 return gen_rtx_SET (arg,
10781 gen_rtx_MEM (word_mode,
10782 gen_rtx_POST_INC (Pmode,
10783 stack_pointer_rtx)));
10786 /* Return >= 0 if there is an unused call-clobbered register available
10787 for the entire function. */
10789 static unsigned int
10790 ix86_select_alt_pic_regnum (void)
10792 if (ix86_use_pseudo_pic_reg ())
10793 return INVALID_REGNUM;
10795 if (crtl->is_leaf
10796 && !crtl->profile
10797 && !ix86_current_function_calls_tls_descriptor)
10799 int i, drap;
10800 /* Can't use the same register for both PIC and DRAP. */
10801 if (crtl->drap_reg)
10802 drap = REGNO (crtl->drap_reg);
10803 else
10804 drap = -1;
10805 for (i = 2; i >= 0; --i)
10806 if (i != drap && !df_regs_ever_live_p (i))
10807 return i;
10810 return INVALID_REGNUM;
10813 /* Return true if REGNO is used by the epilogue. */
10815 bool
10816 ix86_epilogue_uses (int regno)
10818 /* If there are no caller-saved registers, we preserve all registers,
10819 except for MMX and x87 registers which aren't supported when saving
10820 and restoring registers. Don't explicitly save SP register since
10821 it is always preserved. */
10822 return (epilogue_completed
10823 && cfun->machine->no_caller_saved_registers
10824 && !fixed_regs[regno]
10825 && !STACK_REGNO_P (regno)
10826 && !MMX_REGNO_P (regno));
10829 /* Return nonzero if register REGNO can be used as a scratch register
10830 in peephole2. */
10832 static bool
10833 ix86_hard_regno_scratch_ok (unsigned int regno)
10835 /* If there are no caller-saved registers, we can't use any register
10836 as a scratch register after epilogue and use REGNO as scratch
10837 register only if it has been used before to avoid saving and
10838 restoring it. */
10839 return (!cfun->machine->no_caller_saved_registers
10840 || (!epilogue_completed
10841 && df_regs_ever_live_p (regno)));
10844 /* Return true if register class CL should be an additional allocno
10845 class. */
10847 static bool
10848 ix86_additional_allocno_class_p (reg_class_t cl)
10850 return cl == MOD4_SSE_REGS;
10853 /* Return TRUE if we need to save REGNO. */
10855 static bool
10856 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
10858 /* If there are no caller-saved registers, we preserve all registers,
10859 except for MMX and x87 registers which aren't supported when saving
10860 and restoring registers. Don't explicitly save SP register since
10861 it is always preserved. */
10862 if (cfun->machine->no_caller_saved_registers)
10864 /* Don't preserve registers used for function return value. */
10865 rtx reg = crtl->return_rtx;
10866 if (reg)
10868 unsigned int i = REGNO (reg);
10869 unsigned int nregs = REG_NREGS (reg);
10870 while (nregs-- > 0)
10871 if ((i + nregs) == regno)
10872 return false;
10874 reg = crtl->return_bnd;
10875 if (reg)
10877 i = REGNO (reg);
10878 nregs = REG_NREGS (reg);
10879 while (nregs-- > 0)
10880 if ((i + nregs) == regno)
10881 return false;
10885 return (df_regs_ever_live_p (regno)
10886 && !fixed_regs[regno]
10887 && !STACK_REGNO_P (regno)
10888 && !MMX_REGNO_P (regno)
10889 && (regno != HARD_FRAME_POINTER_REGNUM
10890 || !frame_pointer_needed));
10893 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
10894 && pic_offset_table_rtx)
10896 if (ix86_use_pseudo_pic_reg ())
10898 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
10899 _mcount in prologue. */
10900 if (!TARGET_64BIT && flag_pic && crtl->profile)
10901 return true;
10903 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10904 || crtl->profile
10905 || crtl->calls_eh_return
10906 || crtl->uses_const_pool
10907 || cfun->has_nonlocal_label)
10908 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
10911 if (crtl->calls_eh_return && maybe_eh_return)
10913 unsigned i;
10914 for (i = 0; ; i++)
10916 unsigned test = EH_RETURN_DATA_REGNO (i);
10917 if (test == INVALID_REGNUM)
10918 break;
10919 if (test == regno)
10920 return true;
10924 if (ignore_outlined && cfun->machine->call_ms2sysv)
10926 unsigned count = cfun->machine->call_ms2sysv_extra_regs
10927 + xlogue_layout::MIN_REGS;
10928 if (xlogue_layout::is_stub_managed_reg (regno, count))
10929 return false;
10932 if (crtl->drap_reg
10933 && regno == REGNO (crtl->drap_reg)
10934 && !cfun->machine->no_drap_save_restore)
10935 return true;
10937 return (df_regs_ever_live_p (regno)
10938 && !call_used_regs[regno]
10939 && !fixed_regs[regno]
10940 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
10943 /* Return number of saved general prupose registers. */
10945 static int
10946 ix86_nsaved_regs (void)
10948 int nregs = 0;
10949 int regno;
10951 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10952 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
10953 nregs ++;
10954 return nregs;
10957 /* Return number of saved SSE registers. */
10959 static int
10960 ix86_nsaved_sseregs (void)
10962 int nregs = 0;
10963 int regno;
10965 if (!TARGET_64BIT_MS_ABI)
10966 return 0;
10967 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10968 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
10969 nregs ++;
10970 return nregs;
10973 /* Given FROM and TO register numbers, say whether this elimination is
10974 allowed. If stack alignment is needed, we can only replace argument
10975 pointer with hard frame pointer, or replace frame pointer with stack
10976 pointer. Otherwise, frame pointer elimination is automatically
10977 handled and all other eliminations are valid. */
10979 static bool
10980 ix86_can_eliminate (const int from, const int to)
10982 if (stack_realign_fp)
10983 return ((from == ARG_POINTER_REGNUM
10984 && to == HARD_FRAME_POINTER_REGNUM)
10985 || (from == FRAME_POINTER_REGNUM
10986 && to == STACK_POINTER_REGNUM));
10987 else
10988 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10991 /* Return the offset between two registers, one to be eliminated, and the other
10992 its replacement, at the start of a routine. */
10994 HOST_WIDE_INT
10995 ix86_initial_elimination_offset (int from, int to)
10997 struct ix86_frame frame = cfun->machine->frame;
10999 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
11000 return frame.hard_frame_pointer_offset;
11001 else if (from == FRAME_POINTER_REGNUM
11002 && to == HARD_FRAME_POINTER_REGNUM)
11003 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
11004 else
11006 gcc_assert (to == STACK_POINTER_REGNUM);
11008 if (from == ARG_POINTER_REGNUM)
11009 return frame.stack_pointer_offset;
11011 gcc_assert (from == FRAME_POINTER_REGNUM);
11012 return frame.stack_pointer_offset - frame.frame_pointer_offset;
11016 /* In a dynamically-aligned function, we can't know the offset from
11017 stack pointer to frame pointer, so we must ensure that setjmp
11018 eliminates fp against the hard fp (%ebp) rather than trying to
11019 index from %esp up to the top of the frame across a gap that is
11020 of unknown (at compile-time) size. */
11021 static rtx
11022 ix86_builtin_setjmp_frame_value (void)
11024 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
11027 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
11028 static void warn_once_call_ms2sysv_xlogues (const char *feature)
11030 static bool warned_once = false;
11031 if (!warned_once)
11033 warning (0, "-mcall-ms2sysv-xlogues is not compatible with %s",
11034 feature);
11035 warned_once = true;
11039 /* When using -fsplit-stack, the allocation routines set a field in
11040 the TCB to the bottom of the stack plus this much space, measured
11041 in bytes. */
11043 #define SPLIT_STACK_AVAILABLE 256
11045 /* Fill structure ix86_frame about frame of currently computed function. */
11047 static void
11048 ix86_compute_frame_layout (void)
11050 struct ix86_frame *frame = &cfun->machine->frame;
11051 struct machine_function *m = cfun->machine;
11052 unsigned HOST_WIDE_INT stack_alignment_needed;
11053 HOST_WIDE_INT offset;
11054 unsigned HOST_WIDE_INT preferred_alignment;
11055 HOST_WIDE_INT size = get_frame_size ();
11056 HOST_WIDE_INT to_allocate;
11058 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
11059 * ms_abi functions that call a sysv function. We now need to prune away
11060 * cases where it should be disabled. */
11061 if (TARGET_64BIT && m->call_ms2sysv)
11063 gcc_assert (TARGET_64BIT_MS_ABI);
11064 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
11065 gcc_assert (!TARGET_SEH);
11066 gcc_assert (TARGET_SSE);
11067 gcc_assert (!ix86_using_red_zone ());
11069 if (crtl->calls_eh_return)
11071 gcc_assert (!reload_completed);
11072 m->call_ms2sysv = false;
11073 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
11076 else if (ix86_static_chain_on_stack)
11078 gcc_assert (!reload_completed);
11079 m->call_ms2sysv = false;
11080 warn_once_call_ms2sysv_xlogues ("static call chains");
11083 /* Finally, compute which registers the stub will manage. */
11084 else
11086 unsigned count = xlogue_layout::count_stub_managed_regs ();
11087 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
11088 m->call_ms2sysv_pad_in = 0;
11092 frame->nregs = ix86_nsaved_regs ();
11093 frame->nsseregs = ix86_nsaved_sseregs ();
11095 /* 64-bit MS ABI seem to require stack alignment to be always 16,
11096 except for function prologues, leaf functions and when the defult
11097 incoming stack boundary is overriden at command line or via
11098 force_align_arg_pointer attribute. */
11099 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
11100 && (!crtl->is_leaf || cfun->calls_alloca != 0
11101 || ix86_current_function_calls_tls_descriptor
11102 || ix86_incoming_stack_boundary < 128))
11104 crtl->preferred_stack_boundary = 128;
11105 crtl->stack_alignment_needed = 128;
11108 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
11109 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
11111 gcc_assert (!size || stack_alignment_needed);
11112 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
11113 gcc_assert (preferred_alignment <= stack_alignment_needed);
11115 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
11116 gcc_assert (TARGET_64BIT || !frame->nsseregs);
11117 if (TARGET_64BIT && m->call_ms2sysv)
11119 gcc_assert (stack_alignment_needed >= 16);
11120 gcc_assert (!frame->nsseregs);
11123 /* For SEH we have to limit the amount of code movement into the prologue.
11124 At present we do this via a BLOCKAGE, at which point there's very little
11125 scheduling that can be done, which means that there's very little point
11126 in doing anything except PUSHs. */
11127 if (TARGET_SEH)
11128 m->use_fast_prologue_epilogue = false;
11129 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
11131 int count = frame->nregs;
11132 struct cgraph_node *node = cgraph_node::get (current_function_decl);
11134 /* The fast prologue uses move instead of push to save registers. This
11135 is significantly longer, but also executes faster as modern hardware
11136 can execute the moves in parallel, but can't do that for push/pop.
11138 Be careful about choosing what prologue to emit: When function takes
11139 many instructions to execute we may use slow version as well as in
11140 case function is known to be outside hot spot (this is known with
11141 feedback only). Weight the size of function by number of registers
11142 to save as it is cheap to use one or two push instructions but very
11143 slow to use many of them. */
11144 if (count)
11145 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
11146 if (node->frequency < NODE_FREQUENCY_NORMAL
11147 || (flag_branch_probabilities
11148 && node->frequency < NODE_FREQUENCY_HOT))
11149 m->use_fast_prologue_epilogue = false;
11150 else
11151 m->use_fast_prologue_epilogue
11152 = !expensive_function_p (count);
11155 frame->save_regs_using_mov
11156 = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
11157 /* If static stack checking is enabled and done with probes,
11158 the registers need to be saved before allocating the frame. */
11159 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
11161 /* Skip return address and error code in exception handler. */
11162 offset = INCOMING_FRAME_SP_OFFSET;
11164 /* Skip pushed static chain. */
11165 if (ix86_static_chain_on_stack)
11166 offset += UNITS_PER_WORD;
11168 /* Skip saved base pointer. */
11169 if (frame_pointer_needed)
11170 offset += UNITS_PER_WORD;
11171 frame->hfp_save_offset = offset;
11173 /* The traditional frame pointer location is at the top of the frame. */
11174 frame->hard_frame_pointer_offset = offset;
11176 /* Register save area */
11177 offset += frame->nregs * UNITS_PER_WORD;
11178 frame->reg_save_offset = offset;
11180 /* On SEH target, registers are pushed just before the frame pointer
11181 location. */
11182 if (TARGET_SEH)
11183 frame->hard_frame_pointer_offset = offset;
11185 /* Calculate the size of the va-arg area (not including padding, if any). */
11186 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11188 if (stack_realign_fp)
11190 /* We may need a 16-byte aligned stack for the remainder of the
11191 register save area, but the stack frame for the local function
11192 may require a greater alignment if using AVX/2/512. In order
11193 to avoid wasting space, we first calculate the space needed for
11194 the rest of the register saves, add that to the stack pointer,
11195 and then realign the stack to the boundary of the start of the
11196 frame for the local function. */
11197 HOST_WIDE_INT space_needed = 0;
11198 HOST_WIDE_INT sse_reg_space_needed = 0;
11200 if (TARGET_64BIT)
11202 if (m->call_ms2sysv)
11204 m->call_ms2sysv_pad_in = 0;
11205 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
11208 else if (frame->nsseregs)
11209 /* The only ABI that has saved SSE registers (Win64) also has a
11210 16-byte aligned default stack. However, many programs violate
11211 the ABI, and Wine64 forces stack realignment to compensate. */
11212 space_needed = frame->nsseregs * 16;
11214 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
11216 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
11217 rounding to be pedantic. */
11218 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
11220 else
11221 space_needed = frame->va_arg_size;
11223 /* Record the allocation size required prior to the realignment AND. */
11224 frame->stack_realign_allocate = space_needed;
11226 /* The re-aligned stack starts at frame->stack_realign_offset. Values
11227 before this point are not directly comparable with values below
11228 this point. Use sp_valid_at to determine if the stack pointer is
11229 valid for a given offset, fp_valid_at for the frame pointer, or
11230 choose_baseaddr to have a base register chosen for you.
11232 Note that the result of (frame->stack_realign_offset
11233 & (stack_alignment_needed - 1)) may not equal zero. */
11234 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
11235 frame->stack_realign_offset = offset - space_needed;
11236 frame->sse_reg_save_offset = frame->stack_realign_offset
11237 + sse_reg_space_needed;
11239 else
11241 frame->stack_realign_offset = offset;
11243 if (TARGET_64BIT && m->call_ms2sysv)
11245 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
11246 offset += xlogue_layout::get_instance ().get_stack_space_used ();
11249 /* Align and set SSE register save area. */
11250 else if (frame->nsseregs)
11252 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
11253 required and the DRAP re-alignment boundary is at least 16 bytes,
11254 then we want the SSE register save area properly aligned. */
11255 if (ix86_incoming_stack_boundary >= 128
11256 || (stack_realign_drap && stack_alignment_needed >= 16))
11257 offset = ROUND_UP (offset, 16);
11258 offset += frame->nsseregs * 16;
11260 frame->sse_reg_save_offset = offset;
11261 offset += frame->va_arg_size;
11264 /* Align start of frame for local function. */
11265 if (m->call_ms2sysv
11266 || frame->va_arg_size != 0
11267 || size != 0
11268 || !crtl->is_leaf
11269 || cfun->calls_alloca
11270 || ix86_current_function_calls_tls_descriptor)
11271 offset = ROUND_UP (offset, stack_alignment_needed);
11273 /* Frame pointer points here. */
11274 frame->frame_pointer_offset = offset;
11276 offset += size;
11278 /* Add outgoing arguments area. Can be skipped if we eliminated
11279 all the function calls as dead code.
11280 Skipping is however impossible when function calls alloca. Alloca
11281 expander assumes that last crtl->outgoing_args_size
11282 of stack frame are unused. */
11283 if (ACCUMULATE_OUTGOING_ARGS
11284 && (!crtl->is_leaf || cfun->calls_alloca
11285 || ix86_current_function_calls_tls_descriptor))
11287 offset += crtl->outgoing_args_size;
11288 frame->outgoing_arguments_size = crtl->outgoing_args_size;
11290 else
11291 frame->outgoing_arguments_size = 0;
11293 /* Align stack boundary. Only needed if we're calling another function
11294 or using alloca. */
11295 if (!crtl->is_leaf || cfun->calls_alloca
11296 || ix86_current_function_calls_tls_descriptor)
11297 offset = ROUND_UP (offset, preferred_alignment);
11299 /* We've reached end of stack frame. */
11300 frame->stack_pointer_offset = offset;
11302 /* Size prologue needs to allocate. */
11303 to_allocate = offset - frame->sse_reg_save_offset;
11305 if ((!to_allocate && frame->nregs <= 1)
11306 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)))
11307 frame->save_regs_using_mov = false;
11309 if (ix86_using_red_zone ()
11310 && crtl->sp_is_unchanging
11311 && crtl->is_leaf
11312 && !ix86_pc_thunk_call_expanded
11313 && !ix86_current_function_calls_tls_descriptor)
11315 frame->red_zone_size = to_allocate;
11316 if (frame->save_regs_using_mov)
11317 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11318 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11319 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11321 else
11322 frame->red_zone_size = 0;
11323 frame->stack_pointer_offset -= frame->red_zone_size;
11325 /* The SEH frame pointer location is near the bottom of the frame.
11326 This is enforced by the fact that the difference between the
11327 stack pointer and the frame pointer is limited to 240 bytes in
11328 the unwind data structure. */
11329 if (TARGET_SEH)
11331 HOST_WIDE_INT diff;
11333 /* If we can leave the frame pointer where it is, do so. Also, returns
11334 the establisher frame for __builtin_frame_address (0). */
11335 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11336 if (diff <= SEH_MAX_FRAME_SIZE
11337 && (diff > 240 || (diff & 15) != 0)
11338 && !crtl->accesses_prior_frames)
11340 /* Ideally we'd determine what portion of the local stack frame
11341 (within the constraint of the lowest 240) is most heavily used.
11342 But without that complication, simply bias the frame pointer
11343 by 128 bytes so as to maximize the amount of the local stack
11344 frame that is addressable with 8-bit offsets. */
11345 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11350 /* This is semi-inlined memory_address_length, but simplified
11351 since we know that we're always dealing with reg+offset, and
11352 to avoid having to create and discard all that rtl. */
11354 static inline int
11355 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11357 int len = 4;
11359 if (offset == 0)
11361 /* EBP and R13 cannot be encoded without an offset. */
11362 len = (regno == BP_REG || regno == R13_REG);
11364 else if (IN_RANGE (offset, -128, 127))
11365 len = 1;
11367 /* ESP and R12 must be encoded with a SIB byte. */
11368 if (regno == SP_REG || regno == R12_REG)
11369 len++;
11371 return len;
11374 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
11375 the frame save area. The register is saved at CFA - CFA_OFFSET. */
11377 static bool
11378 sp_valid_at (HOST_WIDE_INT cfa_offset)
11380 const struct machine_frame_state &fs = cfun->machine->fs;
11381 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
11383 /* Validate that the cfa_offset isn't in a "no-man's land". */
11384 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
11385 return false;
11387 return fs.sp_valid;
11390 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
11391 the frame save area. The register is saved at CFA - CFA_OFFSET. */
11393 static inline bool
11394 fp_valid_at (HOST_WIDE_INT cfa_offset)
11396 const struct machine_frame_state &fs = cfun->machine->fs;
11397 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
11399 /* Validate that the cfa_offset isn't in a "no-man's land". */
11400 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
11401 return false;
11403 return fs.fp_valid;
11406 /* Choose a base register based upon alignment requested, speed and/or
11407 size. */
11409 static void
11410 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
11411 HOST_WIDE_INT &base_offset,
11412 unsigned int align_reqested, unsigned int *align)
11414 const struct machine_function *m = cfun->machine;
11415 unsigned int hfp_align;
11416 unsigned int drap_align;
11417 unsigned int sp_align;
11418 bool hfp_ok = fp_valid_at (cfa_offset);
11419 bool drap_ok = m->fs.drap_valid;
11420 bool sp_ok = sp_valid_at (cfa_offset);
11422 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
11424 /* Filter out any registers that don't meet the requested alignment
11425 criteria. */
11426 if (align_reqested)
11428 if (m->fs.realigned)
11429 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
11430 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
11431 notes (which we would need to use a realigned stack pointer),
11432 so disable on SEH targets. */
11433 else if (m->fs.sp_realigned)
11434 sp_align = crtl->stack_alignment_needed;
11436 hfp_ok = hfp_ok && hfp_align >= align_reqested;
11437 drap_ok = drap_ok && drap_align >= align_reqested;
11438 sp_ok = sp_ok && sp_align >= align_reqested;
11441 if (m->use_fast_prologue_epilogue)
11443 /* Choose the base register most likely to allow the most scheduling
11444 opportunities. Generally FP is valid throughout the function,
11445 while DRAP must be reloaded within the epilogue. But choose either
11446 over the SP due to increased encoding size. */
11448 if (hfp_ok)
11450 base_reg = hard_frame_pointer_rtx;
11451 base_offset = m->fs.fp_offset - cfa_offset;
11453 else if (drap_ok)
11455 base_reg = crtl->drap_reg;
11456 base_offset = 0 - cfa_offset;
11458 else if (sp_ok)
11460 base_reg = stack_pointer_rtx;
11461 base_offset = m->fs.sp_offset - cfa_offset;
11464 else
11466 HOST_WIDE_INT toffset;
11467 int len = 16, tlen;
11469 /* Choose the base register with the smallest address encoding.
11470 With a tie, choose FP > DRAP > SP. */
11471 if (sp_ok)
11473 base_reg = stack_pointer_rtx;
11474 base_offset = m->fs.sp_offset - cfa_offset;
11475 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
11477 if (drap_ok)
11479 toffset = 0 - cfa_offset;
11480 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
11481 if (tlen <= len)
11483 base_reg = crtl->drap_reg;
11484 base_offset = toffset;
11485 len = tlen;
11488 if (hfp_ok)
11490 toffset = m->fs.fp_offset - cfa_offset;
11491 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
11492 if (tlen <= len)
11494 base_reg = hard_frame_pointer_rtx;
11495 base_offset = toffset;
11496 len = tlen;
11501 /* Set the align return value. */
11502 if (align)
11504 if (base_reg == stack_pointer_rtx)
11505 *align = sp_align;
11506 else if (base_reg == crtl->drap_reg)
11507 *align = drap_align;
11508 else if (base_reg == hard_frame_pointer_rtx)
11509 *align = hfp_align;
11513 /* Return an RTX that points to CFA_OFFSET within the stack frame and
11514 the alignment of address. If ALIGN is non-null, it should point to
11515 an alignment value (in bits) that is preferred or zero and will
11516 recieve the alignment of the base register that was selected,
11517 irrespective of rather or not CFA_OFFSET is a multiple of that
11518 alignment value.
11520 The valid base registers are taken from CFUN->MACHINE->FS. */
11522 static rtx
11523 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align)
11525 rtx base_reg = NULL;
11526 HOST_WIDE_INT base_offset = 0;
11528 /* If a specific alignment is requested, try to get a base register
11529 with that alignment first. */
11530 if (align && *align)
11531 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
11533 if (!base_reg)
11534 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
11536 gcc_assert (base_reg != NULL);
11537 return plus_constant (Pmode, base_reg, base_offset);
11540 /* Emit code to save registers in the prologue. */
11542 static void
11543 ix86_emit_save_regs (void)
11545 unsigned int regno;
11546 rtx_insn *insn;
11548 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
11549 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11551 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
11552 RTX_FRAME_RELATED_P (insn) = 1;
11556 /* Emit a single register save at CFA - CFA_OFFSET. */
11558 static void
11559 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
11560 HOST_WIDE_INT cfa_offset)
11562 struct machine_function *m = cfun->machine;
11563 rtx reg = gen_rtx_REG (mode, regno);
11564 rtx mem, addr, base, insn;
11565 unsigned int align = GET_MODE_ALIGNMENT (mode);
11567 addr = choose_baseaddr (cfa_offset, &align);
11568 mem = gen_frame_mem (mode, addr);
11570 /* The location aligment depends upon the base register. */
11571 align = MIN (GET_MODE_ALIGNMENT (mode), align);
11572 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
11573 set_mem_align (mem, align);
11575 insn = emit_insn (gen_rtx_SET (mem, reg));
11576 RTX_FRAME_RELATED_P (insn) = 1;
11578 base = addr;
11579 if (GET_CODE (base) == PLUS)
11580 base = XEXP (base, 0);
11581 gcc_checking_assert (REG_P (base));
11583 /* When saving registers into a re-aligned local stack frame, avoid
11584 any tricky guessing by dwarf2out. */
11585 if (m->fs.realigned)
11587 gcc_checking_assert (stack_realign_drap);
11589 if (regno == REGNO (crtl->drap_reg))
11591 /* A bit of a hack. We force the DRAP register to be saved in
11592 the re-aligned stack frame, which provides us with a copy
11593 of the CFA that will last past the prologue. Install it. */
11594 gcc_checking_assert (cfun->machine->fs.fp_valid);
11595 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11596 cfun->machine->fs.fp_offset - cfa_offset);
11597 mem = gen_rtx_MEM (mode, addr);
11598 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
11600 else
11602 /* The frame pointer is a stable reference within the
11603 aligned frame. Use it. */
11604 gcc_checking_assert (cfun->machine->fs.fp_valid);
11605 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11606 cfun->machine->fs.fp_offset - cfa_offset);
11607 mem = gen_rtx_MEM (mode, addr);
11608 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11612 else if (base == stack_pointer_rtx && m->fs.sp_realigned
11613 && cfa_offset >= m->fs.sp_realigned_offset)
11615 gcc_checking_assert (stack_realign_fp);
11616 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11619 /* The memory may not be relative to the current CFA register,
11620 which means that we may need to generate a new pattern for
11621 use by the unwind info. */
11622 else if (base != m->fs.cfa_reg)
11624 addr = plus_constant (Pmode, m->fs.cfa_reg,
11625 m->fs.cfa_offset - cfa_offset);
11626 mem = gen_rtx_MEM (mode, addr);
11627 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
11631 /* Emit code to save registers using MOV insns.
11632 First register is stored at CFA - CFA_OFFSET. */
11633 static void
11634 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
11636 unsigned int regno;
11638 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11639 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11641 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
11642 cfa_offset -= UNITS_PER_WORD;
11646 /* Emit code to save SSE registers using MOV insns.
11647 First register is stored at CFA - CFA_OFFSET. */
11648 static void
11649 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
11651 unsigned int regno;
11653 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11654 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11656 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
11657 cfa_offset -= GET_MODE_SIZE (V4SFmode);
11661 static GTY(()) rtx queued_cfa_restores;
11663 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
11664 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
11665 Don't add the note if the previously saved value will be left untouched
11666 within stack red-zone till return, as unwinders can find the same value
11667 in the register and on the stack. */
11669 static void
11670 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
11672 if (!crtl->shrink_wrapped
11673 && cfa_offset <= cfun->machine->fs.red_zone_offset)
11674 return;
11676 if (insn)
11678 add_reg_note (insn, REG_CFA_RESTORE, reg);
11679 RTX_FRAME_RELATED_P (insn) = 1;
11681 else
11682 queued_cfa_restores
11683 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
11686 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
11688 static void
11689 ix86_add_queued_cfa_restore_notes (rtx insn)
11691 rtx last;
11692 if (!queued_cfa_restores)
11693 return;
11694 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
11696 XEXP (last, 1) = REG_NOTES (insn);
11697 REG_NOTES (insn) = queued_cfa_restores;
11698 queued_cfa_restores = NULL_RTX;
11699 RTX_FRAME_RELATED_P (insn) = 1;
11702 /* Expand prologue or epilogue stack adjustment.
11703 The pattern exist to put a dependency on all ebp-based memory accesses.
11704 STYLE should be negative if instructions should be marked as frame related,
11705 zero if %r11 register is live and cannot be freely used and positive
11706 otherwise. */
11708 static rtx
11709 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
11710 int style, bool set_cfa)
11712 struct machine_function *m = cfun->machine;
11713 rtx insn;
11714 bool add_frame_related_expr = false;
11716 if (Pmode == SImode)
11717 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
11718 else if (x86_64_immediate_operand (offset, DImode))
11719 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
11720 else
11722 rtx tmp;
11723 /* r11 is used by indirect sibcall return as well, set before the
11724 epilogue and used after the epilogue. */
11725 if (style)
11726 tmp = gen_rtx_REG (DImode, R11_REG);
11727 else
11729 gcc_assert (src != hard_frame_pointer_rtx
11730 && dest != hard_frame_pointer_rtx);
11731 tmp = hard_frame_pointer_rtx;
11733 insn = emit_insn (gen_rtx_SET (tmp, offset));
11734 if (style < 0)
11735 add_frame_related_expr = true;
11737 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
11740 insn = emit_insn (insn);
11741 if (style >= 0)
11742 ix86_add_queued_cfa_restore_notes (insn);
11744 if (set_cfa)
11746 rtx r;
11748 gcc_assert (m->fs.cfa_reg == src);
11749 m->fs.cfa_offset += INTVAL (offset);
11750 m->fs.cfa_reg = dest;
11752 r = gen_rtx_PLUS (Pmode, src, offset);
11753 r = gen_rtx_SET (dest, r);
11754 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
11755 RTX_FRAME_RELATED_P (insn) = 1;
11757 else if (style < 0)
11759 RTX_FRAME_RELATED_P (insn) = 1;
11760 if (add_frame_related_expr)
11762 rtx r = gen_rtx_PLUS (Pmode, src, offset);
11763 r = gen_rtx_SET (dest, r);
11764 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
11768 if (dest == stack_pointer_rtx)
11770 HOST_WIDE_INT ooffset = m->fs.sp_offset;
11771 bool valid = m->fs.sp_valid;
11772 bool realigned = m->fs.sp_realigned;
11774 if (src == hard_frame_pointer_rtx)
11776 valid = m->fs.fp_valid;
11777 realigned = false;
11778 ooffset = m->fs.fp_offset;
11780 else if (src == crtl->drap_reg)
11782 valid = m->fs.drap_valid;
11783 realigned = false;
11784 ooffset = 0;
11786 else
11788 /* Else there are two possibilities: SP itself, which we set
11789 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
11790 taken care of this by hand along the eh_return path. */
11791 gcc_checking_assert (src == stack_pointer_rtx
11792 || offset == const0_rtx);
11795 m->fs.sp_offset = ooffset - INTVAL (offset);
11796 m->fs.sp_valid = valid;
11797 m->fs.sp_realigned = realigned;
11799 return insn;
11802 /* Find an available register to be used as dynamic realign argument
11803 pointer regsiter. Such a register will be written in prologue and
11804 used in begin of body, so it must not be
11805 1. parameter passing register.
11806 2. GOT pointer.
11807 We reuse static-chain register if it is available. Otherwise, we
11808 use DI for i386 and R13 for x86-64. We chose R13 since it has
11809 shorter encoding.
11811 Return: the regno of chosen register. */
11813 static unsigned int
11814 find_drap_reg (void)
11816 tree decl = cfun->decl;
11818 /* Always use callee-saved register if there are no caller-saved
11819 registers. */
11820 if (TARGET_64BIT)
11822 /* Use R13 for nested function or function need static chain.
11823 Since function with tail call may use any caller-saved
11824 registers in epilogue, DRAP must not use caller-saved
11825 register in such case. */
11826 if (DECL_STATIC_CHAIN (decl)
11827 || cfun->machine->no_caller_saved_registers
11828 || crtl->tail_call_emit)
11829 return R13_REG;
11831 return R10_REG;
11833 else
11835 /* Use DI for nested function or function need static chain.
11836 Since function with tail call may use any caller-saved
11837 registers in epilogue, DRAP must not use caller-saved
11838 register in such case. */
11839 if (DECL_STATIC_CHAIN (decl)
11840 || cfun->machine->no_caller_saved_registers
11841 || crtl->tail_call_emit)
11842 return DI_REG;
11844 /* Reuse static chain register if it isn't used for parameter
11845 passing. */
11846 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
11848 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
11849 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
11850 return CX_REG;
11852 return DI_REG;
11856 /* Handle a "force_align_arg_pointer" attribute. */
11858 static tree
11859 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
11860 tree, int, bool *no_add_attrs)
11862 if (TREE_CODE (*node) != FUNCTION_TYPE
11863 && TREE_CODE (*node) != METHOD_TYPE
11864 && TREE_CODE (*node) != FIELD_DECL
11865 && TREE_CODE (*node) != TYPE_DECL)
11867 warning (OPT_Wattributes, "%qE attribute only applies to functions",
11868 name);
11869 *no_add_attrs = true;
11872 return NULL_TREE;
11875 /* Return minimum incoming stack alignment. */
11877 static unsigned int
11878 ix86_minimum_incoming_stack_boundary (bool sibcall)
11880 unsigned int incoming_stack_boundary;
11882 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
11883 if (cfun->machine->func_type != TYPE_NORMAL)
11884 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
11885 /* Prefer the one specified at command line. */
11886 else if (ix86_user_incoming_stack_boundary)
11887 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
11888 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
11889 if -mstackrealign is used, it isn't used for sibcall check and
11890 estimated stack alignment is 128bit. */
11891 else if (!sibcall
11892 && ix86_force_align_arg_pointer
11893 && crtl->stack_alignment_estimated == 128)
11894 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11895 else
11896 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
11898 /* Incoming stack alignment can be changed on individual functions
11899 via force_align_arg_pointer attribute. We use the smallest
11900 incoming stack boundary. */
11901 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
11902 && lookup_attribute (ix86_force_align_arg_pointer_string,
11903 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
11904 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11906 /* The incoming stack frame has to be aligned at least at
11907 parm_stack_boundary. */
11908 if (incoming_stack_boundary < crtl->parm_stack_boundary)
11909 incoming_stack_boundary = crtl->parm_stack_boundary;
11911 /* Stack at entrance of main is aligned by runtime. We use the
11912 smallest incoming stack boundary. */
11913 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
11914 && DECL_NAME (current_function_decl)
11915 && MAIN_NAME_P (DECL_NAME (current_function_decl))
11916 && DECL_FILE_SCOPE_P (current_function_decl))
11917 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
11919 return incoming_stack_boundary;
11922 /* Update incoming stack boundary and estimated stack alignment. */
11924 static void
11925 ix86_update_stack_boundary (void)
11927 ix86_incoming_stack_boundary
11928 = ix86_minimum_incoming_stack_boundary (false);
11930 /* x86_64 vararg needs 16byte stack alignment for register save
11931 area. */
11932 if (TARGET_64BIT
11933 && cfun->stdarg
11934 && crtl->stack_alignment_estimated < 128)
11935 crtl->stack_alignment_estimated = 128;
11937 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
11938 if (ix86_tls_descriptor_calls_expanded_in_cfun
11939 && crtl->preferred_stack_boundary < 128)
11940 crtl->preferred_stack_boundary = 128;
11943 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
11944 needed or an rtx for DRAP otherwise. */
11946 static rtx
11947 ix86_get_drap_rtx (void)
11949 /* We must use DRAP if there are outgoing arguments on stack and
11950 ACCUMULATE_OUTGOING_ARGS is false. */
11951 if (ix86_force_drap
11952 || (cfun->machine->outgoing_args_on_stack
11953 && !ACCUMULATE_OUTGOING_ARGS))
11954 crtl->need_drap = true;
11956 if (stack_realign_drap)
11958 /* Assign DRAP to vDRAP and returns vDRAP */
11959 unsigned int regno = find_drap_reg ();
11960 rtx drap_vreg;
11961 rtx arg_ptr;
11962 rtx_insn *seq, *insn;
11964 arg_ptr = gen_rtx_REG (Pmode, regno);
11965 crtl->drap_reg = arg_ptr;
11967 start_sequence ();
11968 drap_vreg = copy_to_reg (arg_ptr);
11969 seq = get_insns ();
11970 end_sequence ();
11972 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
11973 if (!optimize)
11975 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
11976 RTX_FRAME_RELATED_P (insn) = 1;
11978 return drap_vreg;
11980 else
11981 return NULL;
11984 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
11986 static rtx
11987 ix86_internal_arg_pointer (void)
11989 return virtual_incoming_args_rtx;
11992 struct scratch_reg {
11993 rtx reg;
11994 bool saved;
11997 /* Return a short-lived scratch register for use on function entry.
11998 In 32-bit mode, it is valid only after the registers are saved
11999 in the prologue. This register must be released by means of
12000 release_scratch_register_on_entry once it is dead. */
12002 static void
12003 get_scratch_register_on_entry (struct scratch_reg *sr)
12005 int regno;
12007 sr->saved = false;
12009 if (TARGET_64BIT)
12011 /* We always use R11 in 64-bit mode. */
12012 regno = R11_REG;
12014 else
12016 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12017 bool fastcall_p
12018 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12019 bool thiscall_p
12020 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12021 bool static_chain_p = DECL_STATIC_CHAIN (decl);
12022 int regparm = ix86_function_regparm (fntype, decl);
12023 int drap_regno
12024 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12026 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12027 for the static chain register. */
12028 if ((regparm < 1 || (fastcall_p && !static_chain_p))
12029 && drap_regno != AX_REG)
12030 regno = AX_REG;
12031 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12032 for the static chain register. */
12033 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12034 regno = AX_REG;
12035 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12036 regno = DX_REG;
12037 /* ecx is the static chain register. */
12038 else if (regparm < 3 && !fastcall_p && !thiscall_p
12039 && !static_chain_p
12040 && drap_regno != CX_REG)
12041 regno = CX_REG;
12042 else if (ix86_save_reg (BX_REG, true, false))
12043 regno = BX_REG;
12044 /* esi is the static chain register. */
12045 else if (!(regparm == 3 && static_chain_p)
12046 && ix86_save_reg (SI_REG, true, false))
12047 regno = SI_REG;
12048 else if (ix86_save_reg (DI_REG, true, false))
12049 regno = DI_REG;
12050 else
12052 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12053 sr->saved = true;
12057 sr->reg = gen_rtx_REG (Pmode, regno);
12058 if (sr->saved)
12060 rtx_insn *insn = emit_insn (gen_push (sr->reg));
12061 RTX_FRAME_RELATED_P (insn) = 1;
12065 /* Release a scratch register obtained from the preceding function. */
12067 static void
12068 release_scratch_register_on_entry (struct scratch_reg *sr)
12070 if (sr->saved)
12072 struct machine_function *m = cfun->machine;
12073 rtx x, insn = emit_insn (gen_pop (sr->reg));
12075 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
12076 RTX_FRAME_RELATED_P (insn) = 1;
12077 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12078 x = gen_rtx_SET (stack_pointer_rtx, x);
12079 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12080 m->fs.sp_offset -= UNITS_PER_WORD;
12084 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
12086 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
12088 This differs from the next routine in that it tries hard to prevent
12089 attacks that jump the stack guard. Thus it is never allowed to allocate
12090 more than PROBE_INTERVAL bytes of stack space without a suitable
12091 probe. */
12093 static void
12094 ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size)
12096 struct machine_function *m = cfun->machine;
12098 /* If this function does not statically allocate stack space, then
12099 no probes are needed. */
12100 if (!size)
12102 /* However, the allocation of space via pushes for register
12103 saves could be viewed as allocating space, but without the
12104 need to probe. */
12105 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
12106 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
12107 else
12108 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
12109 return;
12112 /* If we are a noreturn function, then we have to consider the
12113 possibility that we're called via a jump rather than a call.
12115 Thus we don't have the implicit probe generated by saving the
12116 return address into the stack at the call. Thus, the stack
12117 pointer could be anywhere in the guard page. The safe thing
12118 to do is emit a probe now.
12120 ?!? This should be revamped to work like aarch64 and s390 where
12121 we track the offset from the most recent probe. Normally that
12122 offset would be zero. For a noreturn function we would reset
12123 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
12124 we just probe when we cross PROBE_INTERVAL. */
12125 if (TREE_THIS_VOLATILE (cfun->decl))
12127 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12128 -GET_MODE_SIZE (word_mode)));
12129 emit_insn (gen_blockage ());
12132 /* If we allocate less than the size of the guard statically,
12133 then no probing is necessary, but we do need to allocate
12134 the stack. */
12135 if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)))
12137 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12138 GEN_INT (-size), -1,
12139 m->fs.cfa_reg == stack_pointer_rtx);
12140 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
12141 return;
12144 /* We're allocating a large enough stack frame that we need to
12145 emit probes. Either emit them inline or in a loop depending
12146 on the size. */
12147 HOST_WIDE_INT probe_interval
12148 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
12149 if (size <= 4 * probe_interval)
12151 HOST_WIDE_INT i;
12152 for (i = probe_interval; i <= size; i += probe_interval)
12154 /* Allocate PROBE_INTERVAL bytes. */
12155 rtx insn
12156 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12157 GEN_INT (-PROBE_INTERVAL), -1,
12158 m->fs.cfa_reg == stack_pointer_rtx);
12159 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
12161 /* And probe at *sp. */
12162 emit_stack_probe (stack_pointer_rtx);
12163 emit_insn (gen_blockage ());
12166 /* We need to allocate space for the residual, but we do not need
12167 to probe the residual. */
12168 HOST_WIDE_INT residual = (i - probe_interval - size);
12169 if (residual)
12170 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12171 GEN_INT (residual), -1,
12172 m->fs.cfa_reg == stack_pointer_rtx);
12173 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
12175 else
12177 struct scratch_reg sr;
12178 get_scratch_register_on_entry (&sr);
12180 /* Step 1: round SIZE down to a multiple of the interval. */
12181 HOST_WIDE_INT rounded_size = size & -probe_interval;
12183 /* Step 2: compute final value of the loop counter. Use lea if
12184 possible. */
12185 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
12186 rtx insn;
12187 if (address_no_seg_operand (addr, Pmode))
12188 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
12189 else
12191 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12192 insn = emit_insn (gen_rtx_SET (sr.reg,
12193 gen_rtx_PLUS (Pmode, sr.reg,
12194 stack_pointer_rtx)));
12196 if (m->fs.cfa_reg == stack_pointer_rtx)
12198 add_reg_note (insn, REG_CFA_DEF_CFA,
12199 plus_constant (Pmode, sr.reg,
12200 m->fs.cfa_offset + rounded_size));
12201 RTX_FRAME_RELATED_P (insn) = 1;
12204 /* Step 3: the loop. */
12205 rtx size_rtx = GEN_INT (rounded_size);
12206 insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg,
12207 size_rtx));
12208 if (m->fs.cfa_reg == stack_pointer_rtx)
12210 m->fs.cfa_offset += rounded_size;
12211 add_reg_note (insn, REG_CFA_DEF_CFA,
12212 plus_constant (Pmode, stack_pointer_rtx,
12213 m->fs.cfa_offset));
12214 RTX_FRAME_RELATED_P (insn) = 1;
12216 m->fs.sp_offset += rounded_size;
12217 emit_insn (gen_blockage ());
12219 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
12220 is equal to ROUNDED_SIZE. */
12222 if (size != rounded_size)
12223 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12224 GEN_INT (rounded_size - size), -1,
12225 m->fs.cfa_reg == stack_pointer_rtx);
12226 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
12228 release_scratch_register_on_entry (&sr);
12231 /* Make sure nothing is scheduled before we are done. */
12232 emit_insn (gen_blockage ());
12235 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
12237 static void
12238 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
12240 /* We skip the probe for the first interval + a small dope of 4 words and
12241 probe that many bytes past the specified size to maintain a protection
12242 area at the botton of the stack. */
12243 const int dope = 4 * UNITS_PER_WORD;
12244 rtx size_rtx = GEN_INT (size), last;
12246 /* See if we have a constant small number of probes to generate. If so,
12247 that's the easy case. The run-time loop is made up of 9 insns in the
12248 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12249 for n # of intervals. */
12250 if (size <= 4 * PROBE_INTERVAL)
12252 HOST_WIDE_INT i, adjust;
12253 bool first_probe = true;
12255 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12256 values of N from 1 until it exceeds SIZE. If only one probe is
12257 needed, this will not generate any code. Then adjust and probe
12258 to PROBE_INTERVAL + SIZE. */
12259 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12261 if (first_probe)
12263 adjust = 2 * PROBE_INTERVAL + dope;
12264 first_probe = false;
12266 else
12267 adjust = PROBE_INTERVAL;
12269 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12270 plus_constant (Pmode, stack_pointer_rtx,
12271 -adjust)));
12272 emit_stack_probe (stack_pointer_rtx);
12275 if (first_probe)
12276 adjust = size + PROBE_INTERVAL + dope;
12277 else
12278 adjust = size + PROBE_INTERVAL - i;
12280 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12281 plus_constant (Pmode, stack_pointer_rtx,
12282 -adjust)));
12283 emit_stack_probe (stack_pointer_rtx);
12285 /* Adjust back to account for the additional first interval. */
12286 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12287 plus_constant (Pmode, stack_pointer_rtx,
12288 PROBE_INTERVAL + dope)));
12291 /* Otherwise, do the same as above, but in a loop. Note that we must be
12292 extra careful with variables wrapping around because we might be at
12293 the very top (or the very bottom) of the address space and we have
12294 to be able to handle this case properly; in particular, we use an
12295 equality test for the loop condition. */
12296 else
12298 HOST_WIDE_INT rounded_size;
12299 struct scratch_reg sr;
12301 get_scratch_register_on_entry (&sr);
12304 /* Step 1: round SIZE to the previous multiple of the interval. */
12306 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12309 /* Step 2: compute initial and final value of the loop counter. */
12311 /* SP = SP_0 + PROBE_INTERVAL. */
12312 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12313 plus_constant (Pmode, stack_pointer_rtx,
12314 - (PROBE_INTERVAL + dope))));
12316 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
12317 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
12318 emit_insn (gen_rtx_SET (sr.reg,
12319 plus_constant (Pmode, stack_pointer_rtx,
12320 -rounded_size)));
12321 else
12323 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12324 emit_insn (gen_rtx_SET (sr.reg,
12325 gen_rtx_PLUS (Pmode, sr.reg,
12326 stack_pointer_rtx)));
12330 /* Step 3: the loop
12334 SP = SP + PROBE_INTERVAL
12335 probe at SP
12337 while (SP != LAST_ADDR)
12339 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
12340 values of N from 1 until it is equal to ROUNDED_SIZE. */
12342 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
12345 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
12346 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
12348 if (size != rounded_size)
12350 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12351 plus_constant (Pmode, stack_pointer_rtx,
12352 rounded_size - size)));
12353 emit_stack_probe (stack_pointer_rtx);
12356 /* Adjust back to account for the additional first interval. */
12357 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12358 plus_constant (Pmode, stack_pointer_rtx,
12359 PROBE_INTERVAL + dope)));
12361 release_scratch_register_on_entry (&sr);
12364 /* Even if the stack pointer isn't the CFA register, we need to correctly
12365 describe the adjustments made to it, in particular differentiate the
12366 frame-related ones from the frame-unrelated ones. */
12367 if (size > 0)
12369 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
12370 XVECEXP (expr, 0, 0)
12371 = gen_rtx_SET (stack_pointer_rtx,
12372 plus_constant (Pmode, stack_pointer_rtx, -size));
12373 XVECEXP (expr, 0, 1)
12374 = gen_rtx_SET (stack_pointer_rtx,
12375 plus_constant (Pmode, stack_pointer_rtx,
12376 PROBE_INTERVAL + dope + size));
12377 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
12378 RTX_FRAME_RELATED_P (last) = 1;
12380 cfun->machine->fs.sp_offset += size;
12383 /* Make sure nothing is scheduled before we are done. */
12384 emit_insn (gen_blockage ());
12387 /* Adjust the stack pointer up to REG while probing it. */
12389 const char *
12390 output_adjust_stack_and_probe (rtx reg)
12392 static int labelno = 0;
12393 char loop_lab[32];
12394 rtx xops[2];
12396 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12398 /* Loop. */
12399 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12401 /* SP = SP + PROBE_INTERVAL. */
12402 xops[0] = stack_pointer_rtx;
12403 xops[1] = GEN_INT (PROBE_INTERVAL);
12404 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12406 /* Probe at SP. */
12407 xops[1] = const0_rtx;
12408 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
12410 /* Test if SP == LAST_ADDR. */
12411 xops[0] = stack_pointer_rtx;
12412 xops[1] = reg;
12413 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12415 /* Branch. */
12416 fputs ("\tjne\t", asm_out_file);
12417 assemble_name_raw (asm_out_file, loop_lab);
12418 fputc ('\n', asm_out_file);
12420 return "";
12423 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
12424 inclusive. These are offsets from the current stack pointer. */
12426 static void
12427 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
12429 /* See if we have a constant small number of probes to generate. If so,
12430 that's the easy case. The run-time loop is made up of 6 insns in the
12431 generic case while the compile-time loop is made up of n insns for n #
12432 of intervals. */
12433 if (size <= 6 * PROBE_INTERVAL)
12435 HOST_WIDE_INT i;
12437 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
12438 it exceeds SIZE. If only one probe is needed, this will not
12439 generate any code. Then probe at FIRST + SIZE. */
12440 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12441 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12442 -(first + i)));
12444 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12445 -(first + size)));
12448 /* Otherwise, do the same as above, but in a loop. Note that we must be
12449 extra careful with variables wrapping around because we might be at
12450 the very top (or the very bottom) of the address space and we have
12451 to be able to handle this case properly; in particular, we use an
12452 equality test for the loop condition. */
12453 else
12455 HOST_WIDE_INT rounded_size, last;
12456 struct scratch_reg sr;
12458 get_scratch_register_on_entry (&sr);
12461 /* Step 1: round SIZE to the previous multiple of the interval. */
12463 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12466 /* Step 2: compute initial and final value of the loop counter. */
12468 /* TEST_OFFSET = FIRST. */
12469 emit_move_insn (sr.reg, GEN_INT (-first));
12471 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
12472 last = first + rounded_size;
12475 /* Step 3: the loop
12479 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
12480 probe at TEST_ADDR
12482 while (TEST_ADDR != LAST_ADDR)
12484 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
12485 until it is equal to ROUNDED_SIZE. */
12487 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
12490 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
12491 that SIZE is equal to ROUNDED_SIZE. */
12493 if (size != rounded_size)
12494 emit_stack_probe (plus_constant (Pmode,
12495 gen_rtx_PLUS (Pmode,
12496 stack_pointer_rtx,
12497 sr.reg),
12498 rounded_size - size));
12500 release_scratch_register_on_entry (&sr);
12503 /* Make sure nothing is scheduled before we are done. */
12504 emit_insn (gen_blockage ());
12507 /* Probe a range of stack addresses from REG to END, inclusive. These are
12508 offsets from the current stack pointer. */
12510 const char *
12511 output_probe_stack_range (rtx reg, rtx end)
12513 static int labelno = 0;
12514 char loop_lab[32];
12515 rtx xops[3];
12517 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12519 /* Loop. */
12520 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12522 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
12523 xops[0] = reg;
12524 xops[1] = GEN_INT (PROBE_INTERVAL);
12525 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12527 /* Probe at TEST_ADDR. */
12528 xops[0] = stack_pointer_rtx;
12529 xops[1] = reg;
12530 xops[2] = const0_rtx;
12531 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
12533 /* Test if TEST_ADDR == LAST_ADDR. */
12534 xops[0] = reg;
12535 xops[1] = end;
12536 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12538 /* Branch. */
12539 fputs ("\tjne\t", asm_out_file);
12540 assemble_name_raw (asm_out_file, loop_lab);
12541 fputc ('\n', asm_out_file);
12543 return "";
12546 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
12547 will guide prologue/epilogue to be generated in correct form. */
12549 static void
12550 ix86_finalize_stack_frame_flags (void)
12552 /* Check if stack realign is really needed after reload, and
12553 stores result in cfun */
12554 unsigned int incoming_stack_boundary
12555 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
12556 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
12557 unsigned int stack_alignment
12558 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
12559 ? crtl->max_used_stack_slot_alignment
12560 : crtl->stack_alignment_needed);
12561 unsigned int stack_realign
12562 = (incoming_stack_boundary < stack_alignment);
12563 bool recompute_frame_layout_p = false;
12565 if (crtl->stack_realign_finalized)
12567 /* After stack_realign_needed is finalized, we can't no longer
12568 change it. */
12569 gcc_assert (crtl->stack_realign_needed == stack_realign);
12570 return;
12573 /* If the only reason for frame_pointer_needed is that we conservatively
12574 assumed stack realignment might be needed or -fno-omit-frame-pointer
12575 is used, but in the end nothing that needed the stack alignment had
12576 been spilled nor stack access, clear frame_pointer_needed and say we
12577 don't need stack realignment. */
12578 if ((stack_realign || !flag_omit_frame_pointer)
12579 && frame_pointer_needed
12580 && crtl->is_leaf
12581 && crtl->sp_is_unchanging
12582 && !ix86_current_function_calls_tls_descriptor
12583 && !crtl->accesses_prior_frames
12584 && !cfun->calls_alloca
12585 && !crtl->calls_eh_return
12586 /* See ira_setup_eliminable_regset for the rationale. */
12587 && !(STACK_CHECK_MOVING_SP
12588 && flag_stack_check
12589 && flag_exceptions
12590 && cfun->can_throw_non_call_exceptions)
12591 && !ix86_frame_pointer_required ()
12592 && get_frame_size () == 0
12593 && ix86_nsaved_sseregs () == 0
12594 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
12596 HARD_REG_SET set_up_by_prologue, prologue_used;
12597 basic_block bb;
12599 CLEAR_HARD_REG_SET (prologue_used);
12600 CLEAR_HARD_REG_SET (set_up_by_prologue);
12601 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
12602 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
12603 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
12604 HARD_FRAME_POINTER_REGNUM);
12606 /* The preferred stack alignment is the minimum stack alignment. */
12607 if (stack_alignment > crtl->preferred_stack_boundary)
12608 stack_alignment = crtl->preferred_stack_boundary;
12610 bool require_stack_frame = false;
12612 FOR_EACH_BB_FN (bb, cfun)
12614 rtx_insn *insn;
12615 FOR_BB_INSNS (bb, insn)
12616 if (NONDEBUG_INSN_P (insn)
12617 && requires_stack_frame_p (insn, prologue_used,
12618 set_up_by_prologue))
12620 require_stack_frame = true;
12622 if (stack_realign)
12624 /* Find the maximum stack alignment. */
12625 subrtx_iterator::array_type array;
12626 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12627 if (MEM_P (*iter)
12628 && (reg_mentioned_p (stack_pointer_rtx,
12629 *iter)
12630 || reg_mentioned_p (frame_pointer_rtx,
12631 *iter)))
12633 unsigned int alignment = MEM_ALIGN (*iter);
12634 if (alignment > stack_alignment)
12635 stack_alignment = alignment;
12641 if (require_stack_frame)
12643 /* Stack frame is required. If stack alignment needed is less
12644 than incoming stack boundary, don't realign stack. */
12645 stack_realign = incoming_stack_boundary < stack_alignment;
12646 if (!stack_realign)
12648 crtl->max_used_stack_slot_alignment
12649 = incoming_stack_boundary;
12650 crtl->stack_alignment_needed
12651 = incoming_stack_boundary;
12652 /* Also update preferred_stack_boundary for leaf
12653 functions. */
12654 crtl->preferred_stack_boundary
12655 = incoming_stack_boundary;
12658 else
12660 /* If drap has been set, but it actually isn't live at the
12661 start of the function, there is no reason to set it up. */
12662 if (crtl->drap_reg)
12664 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12665 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
12666 REGNO (crtl->drap_reg)))
12668 crtl->drap_reg = NULL_RTX;
12669 crtl->need_drap = false;
12672 else
12673 cfun->machine->no_drap_save_restore = true;
12675 frame_pointer_needed = false;
12676 stack_realign = false;
12677 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
12678 crtl->stack_alignment_needed = incoming_stack_boundary;
12679 crtl->stack_alignment_estimated = incoming_stack_boundary;
12680 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
12681 crtl->preferred_stack_boundary = incoming_stack_boundary;
12682 df_finish_pass (true);
12683 df_scan_alloc (NULL);
12684 df_scan_blocks ();
12685 df_compute_regs_ever_live (true);
12686 df_analyze ();
12688 if (flag_var_tracking)
12690 /* Since frame pointer is no longer available, replace it with
12691 stack pointer - UNITS_PER_WORD in debug insns. */
12692 df_ref ref, next;
12693 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
12694 ref; ref = next)
12696 next = DF_REF_NEXT_REG (ref);
12697 if (!DF_REF_INSN_INFO (ref))
12698 continue;
12700 /* Make sure the next ref is for a different instruction,
12701 so that we're not affected by the rescan. */
12702 rtx_insn *insn = DF_REF_INSN (ref);
12703 while (next && DF_REF_INSN (next) == insn)
12704 next = DF_REF_NEXT_REG (next);
12706 if (DEBUG_INSN_P (insn))
12708 bool changed = false;
12709 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
12711 rtx *loc = DF_REF_LOC (ref);
12712 if (*loc == hard_frame_pointer_rtx)
12714 *loc = plus_constant (Pmode,
12715 stack_pointer_rtx,
12716 -UNITS_PER_WORD);
12717 changed = true;
12720 if (changed)
12721 df_insn_rescan (insn);
12726 recompute_frame_layout_p = true;
12730 if (crtl->stack_realign_needed != stack_realign)
12731 recompute_frame_layout_p = true;
12732 crtl->stack_realign_needed = stack_realign;
12733 crtl->stack_realign_finalized = true;
12734 if (recompute_frame_layout_p)
12735 ix86_compute_frame_layout ();
12738 /* Delete SET_GOT right after entry block if it is allocated to reg. */
12740 static void
12741 ix86_elim_entry_set_got (rtx reg)
12743 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12744 rtx_insn *c_insn = BB_HEAD (bb);
12745 if (!NONDEBUG_INSN_P (c_insn))
12746 c_insn = next_nonnote_nondebug_insn (c_insn);
12747 if (c_insn && NONJUMP_INSN_P (c_insn))
12749 rtx pat = PATTERN (c_insn);
12750 if (GET_CODE (pat) == PARALLEL)
12752 rtx vec = XVECEXP (pat, 0, 0);
12753 if (GET_CODE (vec) == SET
12754 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
12755 && REGNO (XEXP (vec, 0)) == REGNO (reg))
12756 delete_insn (c_insn);
12761 static rtx
12762 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
12764 rtx addr, mem;
12766 if (offset)
12767 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
12768 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
12769 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
12772 static inline rtx
12773 gen_frame_load (rtx reg, rtx frame_reg, int offset)
12775 return gen_frame_set (reg, frame_reg, offset, false);
12778 static inline rtx
12779 gen_frame_store (rtx reg, rtx frame_reg, int offset)
12781 return gen_frame_set (reg, frame_reg, offset, true);
12784 static void
12785 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
12787 struct machine_function *m = cfun->machine;
12788 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
12789 + m->call_ms2sysv_extra_regs;
12790 rtvec v = rtvec_alloc (ncregs + 1);
12791 unsigned int align, i, vi = 0;
12792 rtx_insn *insn;
12793 rtx sym, addr;
12794 rtx rax = gen_rtx_REG (word_mode, AX_REG);
12795 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
12796 HOST_WIDE_INT allocate = frame.stack_pointer_offset - m->fs.sp_offset;
12798 /* AL should only be live with sysv_abi. */
12799 gcc_assert (!ix86_eax_live_at_start_p ());
12801 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
12802 we've actually realigned the stack or not. */
12803 align = GET_MODE_ALIGNMENT (V4SFmode);
12804 addr = choose_baseaddr (frame.stack_realign_offset
12805 + xlogue.get_stub_ptr_offset (), &align);
12806 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
12807 emit_insn (gen_rtx_SET (rax, addr));
12809 /* Allocate stack if not already done. */
12810 if (allocate > 0)
12811 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12812 GEN_INT (-allocate), -1, false);
12814 /* Get the stub symbol. */
12815 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
12816 : XLOGUE_STUB_SAVE);
12817 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
12819 for (i = 0; i < ncregs; ++i)
12821 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
12822 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
12823 r.regno);
12824 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
12827 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
12829 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
12830 RTX_FRAME_RELATED_P (insn) = true;
12833 /* Expand the prologue into a bunch of separate insns. */
12835 void
12836 ix86_expand_prologue (void)
12838 struct machine_function *m = cfun->machine;
12839 rtx insn, t;
12840 struct ix86_frame frame;
12841 HOST_WIDE_INT allocate;
12842 bool int_registers_saved;
12843 bool sse_registers_saved;
12844 rtx static_chain = NULL_RTX;
12846 if (ix86_function_naked (current_function_decl))
12847 return;
12849 ix86_finalize_stack_frame_flags ();
12851 /* DRAP should not coexist with stack_realign_fp */
12852 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
12854 memset (&m->fs, 0, sizeof (m->fs));
12856 /* Initialize CFA state for before the prologue. */
12857 m->fs.cfa_reg = stack_pointer_rtx;
12858 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
12860 /* Track SP offset to the CFA. We continue tracking this after we've
12861 swapped the CFA register away from SP. In the case of re-alignment
12862 this is fudged; we're interested to offsets within the local frame. */
12863 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12864 m->fs.sp_valid = true;
12865 m->fs.sp_realigned = false;
12867 frame = m->frame;
12869 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
12871 /* We should have already generated an error for any use of
12872 ms_hook on a nested function. */
12873 gcc_checking_assert (!ix86_static_chain_on_stack);
12875 /* Check if profiling is active and we shall use profiling before
12876 prologue variant. If so sorry. */
12877 if (crtl->profile && flag_fentry != 0)
12878 sorry ("ms_hook_prologue attribute isn%'t compatible "
12879 "with -mfentry for 32-bit");
12881 /* In ix86_asm_output_function_label we emitted:
12882 8b ff movl.s %edi,%edi
12883 55 push %ebp
12884 8b ec movl.s %esp,%ebp
12886 This matches the hookable function prologue in Win32 API
12887 functions in Microsoft Windows XP Service Pack 2 and newer.
12888 Wine uses this to enable Windows apps to hook the Win32 API
12889 functions provided by Wine.
12891 What that means is that we've already set up the frame pointer. */
12893 if (frame_pointer_needed
12894 && !(crtl->drap_reg && crtl->stack_realign_needed))
12896 rtx push, mov;
12898 /* We've decided to use the frame pointer already set up.
12899 Describe this to the unwinder by pretending that both
12900 push and mov insns happen right here.
12902 Putting the unwind info here at the end of the ms_hook
12903 is done so that we can make absolutely certain we get
12904 the required byte sequence at the start of the function,
12905 rather than relying on an assembler that can produce
12906 the exact encoding required.
12908 However it does mean (in the unpatched case) that we have
12909 a 1 insn window where the asynchronous unwind info is
12910 incorrect. However, if we placed the unwind info at
12911 its correct location we would have incorrect unwind info
12912 in the patched case. Which is probably all moot since
12913 I don't expect Wine generates dwarf2 unwind info for the
12914 system libraries that use this feature. */
12916 insn = emit_insn (gen_blockage ());
12918 push = gen_push (hard_frame_pointer_rtx);
12919 mov = gen_rtx_SET (hard_frame_pointer_rtx,
12920 stack_pointer_rtx);
12921 RTX_FRAME_RELATED_P (push) = 1;
12922 RTX_FRAME_RELATED_P (mov) = 1;
12924 RTX_FRAME_RELATED_P (insn) = 1;
12925 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12926 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
12928 /* Note that gen_push incremented m->fs.cfa_offset, even
12929 though we didn't emit the push insn here. */
12930 m->fs.cfa_reg = hard_frame_pointer_rtx;
12931 m->fs.fp_offset = m->fs.cfa_offset;
12932 m->fs.fp_valid = true;
12934 else
12936 /* The frame pointer is not needed so pop %ebp again.
12937 This leaves us with a pristine state. */
12938 emit_insn (gen_pop (hard_frame_pointer_rtx));
12942 /* The first insn of a function that accepts its static chain on the
12943 stack is to push the register that would be filled in by a direct
12944 call. This insn will be skipped by the trampoline. */
12945 else if (ix86_static_chain_on_stack)
12947 static_chain = ix86_static_chain (cfun->decl, false);
12948 insn = emit_insn (gen_push (static_chain));
12949 emit_insn (gen_blockage ());
12951 /* We don't want to interpret this push insn as a register save,
12952 only as a stack adjustment. The real copy of the register as
12953 a save will be done later, if needed. */
12954 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
12955 t = gen_rtx_SET (stack_pointer_rtx, t);
12956 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
12957 RTX_FRAME_RELATED_P (insn) = 1;
12960 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
12961 of DRAP is needed and stack realignment is really needed after reload */
12962 if (stack_realign_drap)
12964 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12966 /* Can't use DRAP in interrupt function. */
12967 if (cfun->machine->func_type != TYPE_NORMAL)
12968 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
12969 "in interrupt service routine. This may be worked "
12970 "around by avoiding functions with aggregate return.");
12972 /* Only need to push parameter pointer reg if it is caller saved. */
12973 if (!call_used_regs[REGNO (crtl->drap_reg)])
12975 /* Push arg pointer reg */
12976 insn = emit_insn (gen_push (crtl->drap_reg));
12977 RTX_FRAME_RELATED_P (insn) = 1;
12980 /* Grab the argument pointer. */
12981 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
12982 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
12983 RTX_FRAME_RELATED_P (insn) = 1;
12984 m->fs.cfa_reg = crtl->drap_reg;
12985 m->fs.cfa_offset = 0;
12987 /* Align the stack. */
12988 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12989 stack_pointer_rtx,
12990 GEN_INT (-align_bytes)));
12991 RTX_FRAME_RELATED_P (insn) = 1;
12993 /* Replicate the return address on the stack so that return
12994 address can be reached via (argp - 1) slot. This is needed
12995 to implement macro RETURN_ADDR_RTX and intrinsic function
12996 expand_builtin_return_addr etc. */
12997 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
12998 t = gen_frame_mem (word_mode, t);
12999 insn = emit_insn (gen_push (t));
13000 RTX_FRAME_RELATED_P (insn) = 1;
13002 /* For the purposes of frame and register save area addressing,
13003 we've started over with a new frame. */
13004 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
13005 m->fs.realigned = true;
13007 if (static_chain)
13009 /* Replicate static chain on the stack so that static chain
13010 can be reached via (argp - 2) slot. This is needed for
13011 nested function with stack realignment. */
13012 insn = emit_insn (gen_push (static_chain));
13013 RTX_FRAME_RELATED_P (insn) = 1;
13017 int_registers_saved = (frame.nregs == 0);
13018 sse_registers_saved = (frame.nsseregs == 0);
13020 if (frame_pointer_needed && !m->fs.fp_valid)
13022 /* Note: AT&T enter does NOT have reversed args. Enter is probably
13023 slower on all targets. Also sdb didn't like it. */
13024 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
13025 RTX_FRAME_RELATED_P (insn) = 1;
13027 /* Push registers now, before setting the frame pointer
13028 on SEH target. */
13029 if (!int_registers_saved
13030 && TARGET_SEH
13031 && !frame.save_regs_using_mov)
13033 ix86_emit_save_regs ();
13034 int_registers_saved = true;
13035 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13038 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
13040 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
13041 RTX_FRAME_RELATED_P (insn) = 1;
13043 if (m->fs.cfa_reg == stack_pointer_rtx)
13044 m->fs.cfa_reg = hard_frame_pointer_rtx;
13045 m->fs.fp_offset = m->fs.sp_offset;
13046 m->fs.fp_valid = true;
13050 if (!int_registers_saved)
13052 /* If saving registers via PUSH, do so now. */
13053 if (!frame.save_regs_using_mov)
13055 ix86_emit_save_regs ();
13056 int_registers_saved = true;
13057 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13060 /* When using red zone we may start register saving before allocating
13061 the stack frame saving one cycle of the prologue. However, avoid
13062 doing this if we have to probe the stack; at least on x86_64 the
13063 stack probe can turn into a call that clobbers a red zone location. */
13064 else if (ix86_using_red_zone ()
13065 && (! TARGET_STACK_PROBE
13066 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
13068 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13069 int_registers_saved = true;
13073 if (stack_realign_fp)
13075 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
13076 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
13078 /* Record last valid frame pointer offset. */
13079 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
13081 /* The computation of the size of the re-aligned stack frame means
13082 that we must allocate the size of the register save area before
13083 performing the actual alignment. Otherwise we cannot guarantee
13084 that there's enough storage above the realignment point. */
13085 allocate = frame.reg_save_offset - m->fs.sp_offset
13086 + frame.stack_realign_allocate;
13087 if (allocate)
13088 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13089 GEN_INT (-allocate), -1, false);
13091 /* Align the stack. */
13092 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13093 stack_pointer_rtx,
13094 GEN_INT (-align_bytes)));
13095 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
13096 m->fs.sp_realigned_offset = m->fs.sp_offset
13097 - frame.stack_realign_allocate;
13098 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
13099 Beyond this point, stack access should be done via choose_baseaddr or
13100 by using sp_valid_at and fp_valid_at to determine the correct base
13101 register. Henceforth, any CFA offset should be thought of as logical
13102 and not physical. */
13103 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
13104 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
13105 m->fs.sp_realigned = true;
13107 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
13108 is needed to describe where a register is saved using a realigned
13109 stack pointer, so we need to invalidate the stack pointer for that
13110 target. */
13111 if (TARGET_SEH)
13112 m->fs.sp_valid = false;
13115 if (m->call_ms2sysv)
13116 ix86_emit_outlined_ms2sysv_save (frame);
13118 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
13120 if (flag_stack_usage_info)
13122 /* We start to count from ARG_POINTER. */
13123 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
13125 /* If it was realigned, take into account the fake frame. */
13126 if (stack_realign_drap)
13128 if (ix86_static_chain_on_stack)
13129 stack_size += UNITS_PER_WORD;
13131 if (!call_used_regs[REGNO (crtl->drap_reg)])
13132 stack_size += UNITS_PER_WORD;
13134 /* This over-estimates by 1 minimal-stack-alignment-unit but
13135 mitigates that by counting in the new return address slot. */
13136 current_function_dynamic_stack_size
13137 += crtl->stack_alignment_needed / BITS_PER_UNIT;
13140 current_function_static_stack_size = stack_size;
13143 /* On SEH target with very large frame size, allocate an area to save
13144 SSE registers (as the very large allocation won't be described). */
13145 if (TARGET_SEH
13146 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
13147 && !sse_registers_saved)
13149 HOST_WIDE_INT sse_size =
13150 frame.sse_reg_save_offset - frame.reg_save_offset;
13152 gcc_assert (int_registers_saved);
13154 /* No need to do stack checking as the area will be immediately
13155 written. */
13156 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13157 GEN_INT (-sse_size), -1,
13158 m->fs.cfa_reg == stack_pointer_rtx);
13159 allocate -= sse_size;
13160 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13161 sse_registers_saved = true;
13164 /* The stack has already been decremented by the instruction calling us
13165 so probe if the size is non-negative to preserve the protection area. */
13166 if (allocate >= 0
13167 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
13168 || flag_stack_clash_protection))
13170 /* We expect the GP registers to be saved when probes are used. */
13171 gcc_assert (int_registers_saved);
13173 if (flag_stack_clash_protection)
13175 ix86_adjust_stack_and_probe_stack_clash (allocate);
13176 allocate = 0;
13178 else if (STACK_CHECK_MOVING_SP)
13180 if (!(crtl->is_leaf && !cfun->calls_alloca
13181 && allocate <= PROBE_INTERVAL))
13183 ix86_adjust_stack_and_probe (allocate);
13184 allocate = 0;
13187 else
13189 HOST_WIDE_INT size = allocate;
13191 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
13192 size = 0x80000000 - get_stack_check_protect () - 1;
13194 if (TARGET_STACK_PROBE)
13196 if (crtl->is_leaf && !cfun->calls_alloca)
13198 if (size > PROBE_INTERVAL)
13199 ix86_emit_probe_stack_range (0, size);
13201 else
13202 ix86_emit_probe_stack_range (0,
13203 size + get_stack_check_protect ());
13205 else
13207 if (crtl->is_leaf && !cfun->calls_alloca)
13209 if (size > PROBE_INTERVAL
13210 && size > get_stack_check_protect ())
13211 ix86_emit_probe_stack_range (get_stack_check_protect (),
13212 size - get_stack_check_protect ());
13214 else
13215 ix86_emit_probe_stack_range (get_stack_check_protect (), size);
13220 if (allocate == 0)
13222 else if (!ix86_target_stack_probe ()
13223 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
13225 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13226 GEN_INT (-allocate), -1,
13227 m->fs.cfa_reg == stack_pointer_rtx);
13229 else
13231 rtx eax = gen_rtx_REG (Pmode, AX_REG);
13232 rtx r10 = NULL;
13233 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
13234 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
13235 bool eax_live = ix86_eax_live_at_start_p ();
13236 bool r10_live = false;
13238 if (TARGET_64BIT)
13239 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
13241 if (eax_live)
13243 insn = emit_insn (gen_push (eax));
13244 allocate -= UNITS_PER_WORD;
13245 /* Note that SEH directives need to continue tracking the stack
13246 pointer even after the frame pointer has been set up. */
13247 if (sp_is_cfa_reg || TARGET_SEH)
13249 if (sp_is_cfa_reg)
13250 m->fs.cfa_offset += UNITS_PER_WORD;
13251 RTX_FRAME_RELATED_P (insn) = 1;
13252 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13253 gen_rtx_SET (stack_pointer_rtx,
13254 plus_constant (Pmode, stack_pointer_rtx,
13255 -UNITS_PER_WORD)));
13259 if (r10_live)
13261 r10 = gen_rtx_REG (Pmode, R10_REG);
13262 insn = emit_insn (gen_push (r10));
13263 allocate -= UNITS_PER_WORD;
13264 if (sp_is_cfa_reg || TARGET_SEH)
13266 if (sp_is_cfa_reg)
13267 m->fs.cfa_offset += UNITS_PER_WORD;
13268 RTX_FRAME_RELATED_P (insn) = 1;
13269 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13270 gen_rtx_SET (stack_pointer_rtx,
13271 plus_constant (Pmode, stack_pointer_rtx,
13272 -UNITS_PER_WORD)));
13276 emit_move_insn (eax, GEN_INT (allocate));
13277 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
13279 /* Use the fact that AX still contains ALLOCATE. */
13280 adjust_stack_insn = (Pmode == DImode
13281 ? gen_pro_epilogue_adjust_stack_di_sub
13282 : gen_pro_epilogue_adjust_stack_si_sub);
13284 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
13285 stack_pointer_rtx, eax));
13287 if (sp_is_cfa_reg || TARGET_SEH)
13289 if (sp_is_cfa_reg)
13290 m->fs.cfa_offset += allocate;
13291 RTX_FRAME_RELATED_P (insn) = 1;
13292 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13293 gen_rtx_SET (stack_pointer_rtx,
13294 plus_constant (Pmode, stack_pointer_rtx,
13295 -allocate)));
13297 m->fs.sp_offset += allocate;
13299 /* Use stack_pointer_rtx for relative addressing so that code
13300 works for realigned stack, too. */
13301 if (r10_live && eax_live)
13303 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13304 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13305 gen_frame_mem (word_mode, t));
13306 t = plus_constant (Pmode, t, UNITS_PER_WORD);
13307 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
13308 gen_frame_mem (word_mode, t));
13310 else if (eax_live || r10_live)
13312 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13313 emit_move_insn (gen_rtx_REG (word_mode,
13314 (eax_live ? AX_REG : R10_REG)),
13315 gen_frame_mem (word_mode, t));
13318 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
13320 /* If we havn't already set up the frame pointer, do so now. */
13321 if (frame_pointer_needed && !m->fs.fp_valid)
13323 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
13324 GEN_INT (frame.stack_pointer_offset
13325 - frame.hard_frame_pointer_offset));
13326 insn = emit_insn (insn);
13327 RTX_FRAME_RELATED_P (insn) = 1;
13328 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
13330 if (m->fs.cfa_reg == stack_pointer_rtx)
13331 m->fs.cfa_reg = hard_frame_pointer_rtx;
13332 m->fs.fp_offset = frame.hard_frame_pointer_offset;
13333 m->fs.fp_valid = true;
13336 if (!int_registers_saved)
13337 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13338 if (!sse_registers_saved)
13339 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13341 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
13342 in PROLOGUE. */
13343 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
13345 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
13346 insn = emit_insn (gen_set_got (pic));
13347 RTX_FRAME_RELATED_P (insn) = 1;
13348 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
13349 emit_insn (gen_prologue_use (pic));
13350 /* Deleting already emmitted SET_GOT if exist and allocated to
13351 REAL_PIC_OFFSET_TABLE_REGNUM. */
13352 ix86_elim_entry_set_got (pic);
13355 if (crtl->drap_reg && !crtl->stack_realign_needed)
13357 /* vDRAP is setup but after reload it turns out stack realign
13358 isn't necessary, here we will emit prologue to setup DRAP
13359 without stack realign adjustment */
13360 t = choose_baseaddr (0, NULL);
13361 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13364 /* Prevent instructions from being scheduled into register save push
13365 sequence when access to the redzone area is done through frame pointer.
13366 The offset between the frame pointer and the stack pointer is calculated
13367 relative to the value of the stack pointer at the end of the function
13368 prologue, and moving instructions that access redzone area via frame
13369 pointer inside push sequence violates this assumption. */
13370 if (frame_pointer_needed && frame.red_zone_size)
13371 emit_insn (gen_memory_blockage ());
13373 /* SEH requires that the prologue end within 256 bytes of the start of
13374 the function. Prevent instruction schedules that would extend that.
13375 Further, prevent alloca modifications to the stack pointer from being
13376 combined with prologue modifications. */
13377 if (TARGET_SEH)
13378 emit_insn (gen_prologue_use (stack_pointer_rtx));
13381 /* Emit code to restore REG using a POP insn. */
13383 static void
13384 ix86_emit_restore_reg_using_pop (rtx reg)
13386 struct machine_function *m = cfun->machine;
13387 rtx_insn *insn = emit_insn (gen_pop (reg));
13389 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
13390 m->fs.sp_offset -= UNITS_PER_WORD;
13392 if (m->fs.cfa_reg == crtl->drap_reg
13393 && REGNO (reg) == REGNO (crtl->drap_reg))
13395 /* Previously we'd represented the CFA as an expression
13396 like *(%ebp - 8). We've just popped that value from
13397 the stack, which means we need to reset the CFA to
13398 the drap register. This will remain until we restore
13399 the stack pointer. */
13400 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13401 RTX_FRAME_RELATED_P (insn) = 1;
13403 /* This means that the DRAP register is valid for addressing too. */
13404 m->fs.drap_valid = true;
13405 return;
13408 if (m->fs.cfa_reg == stack_pointer_rtx)
13410 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13411 x = gen_rtx_SET (stack_pointer_rtx, x);
13412 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13413 RTX_FRAME_RELATED_P (insn) = 1;
13415 m->fs.cfa_offset -= UNITS_PER_WORD;
13418 /* When the frame pointer is the CFA, and we pop it, we are
13419 swapping back to the stack pointer as the CFA. This happens
13420 for stack frames that don't allocate other data, so we assume
13421 the stack pointer is now pointing at the return address, i.e.
13422 the function entry state, which makes the offset be 1 word. */
13423 if (reg == hard_frame_pointer_rtx)
13425 m->fs.fp_valid = false;
13426 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13428 m->fs.cfa_reg = stack_pointer_rtx;
13429 m->fs.cfa_offset -= UNITS_PER_WORD;
13431 add_reg_note (insn, REG_CFA_DEF_CFA,
13432 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13433 GEN_INT (m->fs.cfa_offset)));
13434 RTX_FRAME_RELATED_P (insn) = 1;
13439 /* Emit code to restore saved registers using POP insns. */
13441 static void
13442 ix86_emit_restore_regs_using_pop (void)
13444 unsigned int regno;
13446 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13447 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
13448 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
13451 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
13452 omits the emit and only attaches the notes. */
13454 static void
13455 ix86_emit_leave (rtx_insn *insn)
13457 struct machine_function *m = cfun->machine;
13458 if (!insn)
13459 insn = emit_insn (ix86_gen_leave ());
13461 ix86_add_queued_cfa_restore_notes (insn);
13463 gcc_assert (m->fs.fp_valid);
13464 m->fs.sp_valid = true;
13465 m->fs.sp_realigned = false;
13466 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
13467 m->fs.fp_valid = false;
13469 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13471 m->fs.cfa_reg = stack_pointer_rtx;
13472 m->fs.cfa_offset = m->fs.sp_offset;
13474 add_reg_note (insn, REG_CFA_DEF_CFA,
13475 plus_constant (Pmode, stack_pointer_rtx,
13476 m->fs.sp_offset));
13477 RTX_FRAME_RELATED_P (insn) = 1;
13479 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
13480 m->fs.fp_offset);
13483 /* Emit code to restore saved registers using MOV insns.
13484 First register is restored from CFA - CFA_OFFSET. */
13485 static void
13486 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
13487 bool maybe_eh_return)
13489 struct machine_function *m = cfun->machine;
13490 unsigned int regno;
13492 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13493 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
13495 rtx reg = gen_rtx_REG (word_mode, regno);
13496 rtx mem;
13497 rtx_insn *insn;
13499 mem = choose_baseaddr (cfa_offset, NULL);
13500 mem = gen_frame_mem (word_mode, mem);
13501 insn = emit_move_insn (reg, mem);
13503 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
13505 /* Previously we'd represented the CFA as an expression
13506 like *(%ebp - 8). We've just popped that value from
13507 the stack, which means we need to reset the CFA to
13508 the drap register. This will remain until we restore
13509 the stack pointer. */
13510 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13511 RTX_FRAME_RELATED_P (insn) = 1;
13513 /* This means that the DRAP register is valid for addressing. */
13514 m->fs.drap_valid = true;
13516 else
13517 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13519 cfa_offset -= UNITS_PER_WORD;
13523 /* Emit code to restore saved registers using MOV insns.
13524 First register is restored from CFA - CFA_OFFSET. */
13525 static void
13526 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
13527 bool maybe_eh_return)
13529 unsigned int regno;
13531 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13532 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
13534 rtx reg = gen_rtx_REG (V4SFmode, regno);
13535 rtx mem;
13536 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
13538 mem = choose_baseaddr (cfa_offset, &align);
13539 mem = gen_rtx_MEM (V4SFmode, mem);
13541 /* The location aligment depends upon the base register. */
13542 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
13543 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
13544 set_mem_align (mem, align);
13545 emit_insn (gen_rtx_SET (reg, mem));
13547 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13549 cfa_offset -= GET_MODE_SIZE (V4SFmode);
13553 static void
13554 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
13555 bool use_call, int style)
13557 struct machine_function *m = cfun->machine;
13558 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
13559 + m->call_ms2sysv_extra_regs;
13560 rtvec v;
13561 unsigned int elems_needed, align, i, vi = 0;
13562 rtx_insn *insn;
13563 rtx sym, tmp;
13564 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
13565 rtx r10 = NULL_RTX;
13566 const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
13567 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
13568 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
13569 rtx rsi_frame_load = NULL_RTX;
13570 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
13571 enum xlogue_stub stub;
13573 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
13575 /* If using a realigned stack, we should never start with padding. */
13576 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
13578 /* Setup RSI as the stub's base pointer. */
13579 align = GET_MODE_ALIGNMENT (V4SFmode);
13580 tmp = choose_baseaddr (rsi_offset, &align);
13581 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
13582 emit_insn (gen_rtx_SET (rsi, tmp));
13584 /* Get a symbol for the stub. */
13585 if (frame_pointer_needed)
13586 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
13587 : XLOGUE_STUB_RESTORE_HFP_TAIL;
13588 else
13589 stub = use_call ? XLOGUE_STUB_RESTORE
13590 : XLOGUE_STUB_RESTORE_TAIL;
13591 sym = xlogue.get_stub_rtx (stub);
13593 elems_needed = ncregs;
13594 if (use_call)
13595 elems_needed += 1;
13596 else
13597 elems_needed += frame_pointer_needed ? 5 : 3;
13598 v = rtvec_alloc (elems_needed);
13600 /* We call the epilogue stub when we need to pop incoming args or we are
13601 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
13602 epilogue stub and it is the tail-call. */
13603 if (use_call)
13604 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
13605 else
13607 RTVEC_ELT (v, vi++) = ret_rtx;
13608 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
13609 if (frame_pointer_needed)
13611 rtx rbp = gen_rtx_REG (DImode, BP_REG);
13612 gcc_assert (m->fs.fp_valid);
13613 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
13615 tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
13616 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
13617 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
13618 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
13619 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
13621 else
13623 /* If no hard frame pointer, we set R10 to the SP restore value. */
13624 gcc_assert (!m->fs.fp_valid);
13625 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13626 gcc_assert (m->fs.sp_valid);
13628 r10 = gen_rtx_REG (DImode, R10_REG);
13629 tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
13630 emit_insn (gen_rtx_SET (r10, tmp));
13632 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
13636 /* Generate frame load insns and restore notes. */
13637 for (i = 0; i < ncregs; ++i)
13639 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
13640 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
13641 rtx reg, frame_load;
13643 reg = gen_rtx_REG (mode, r.regno);
13644 frame_load = gen_frame_load (reg, rsi, r.offset);
13646 /* Save RSI frame load insn & note to add last. */
13647 if (r.regno == SI_REG)
13649 gcc_assert (!rsi_frame_load);
13650 rsi_frame_load = frame_load;
13651 rsi_restore_offset = r.offset;
13653 else
13655 RTVEC_ELT (v, vi++) = frame_load;
13656 ix86_add_cfa_restore_note (NULL, reg, r.offset);
13660 /* Add RSI frame load & restore note at the end. */
13661 gcc_assert (rsi_frame_load);
13662 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
13663 RTVEC_ELT (v, vi++) = rsi_frame_load;
13664 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
13665 rsi_restore_offset);
13667 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
13668 if (!use_call && !frame_pointer_needed)
13670 gcc_assert (m->fs.sp_valid);
13671 gcc_assert (!m->fs.sp_realigned);
13673 /* At this point, R10 should point to frame.stack_realign_offset. */
13674 if (m->fs.cfa_reg == stack_pointer_rtx)
13675 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
13676 m->fs.sp_offset = frame.stack_realign_offset;
13679 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
13680 tmp = gen_rtx_PARALLEL (VOIDmode, v);
13681 if (use_call)
13682 insn = emit_insn (tmp);
13683 else
13685 insn = emit_jump_insn (tmp);
13686 JUMP_LABEL (insn) = ret_rtx;
13688 if (frame_pointer_needed)
13689 ix86_emit_leave (insn);
13690 else
13692 /* Need CFA adjust note. */
13693 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
13694 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
13698 RTX_FRAME_RELATED_P (insn) = true;
13699 ix86_add_queued_cfa_restore_notes (insn);
13701 /* If we're not doing a tail-call, we need to adjust the stack. */
13702 if (use_call && m->fs.sp_valid)
13704 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
13705 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13706 GEN_INT (dealloc), style,
13707 m->fs.cfa_reg == stack_pointer_rtx);
13711 /* Restore function stack, frame, and registers. */
13713 void
13714 ix86_expand_epilogue (int style)
13716 struct machine_function *m = cfun->machine;
13717 struct machine_frame_state frame_state_save = m->fs;
13718 struct ix86_frame frame;
13719 bool restore_regs_via_mov;
13720 bool using_drap;
13721 bool restore_stub_is_tail = false;
13723 if (ix86_function_naked (current_function_decl))
13725 /* The program should not reach this point. */
13726 emit_insn (gen_ud2 ());
13727 return;
13730 ix86_finalize_stack_frame_flags ();
13731 frame = m->frame;
13733 m->fs.sp_realigned = stack_realign_fp;
13734 m->fs.sp_valid = stack_realign_fp
13735 || !frame_pointer_needed
13736 || crtl->sp_is_unchanging;
13737 gcc_assert (!m->fs.sp_valid
13738 || m->fs.sp_offset == frame.stack_pointer_offset);
13740 /* The FP must be valid if the frame pointer is present. */
13741 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
13742 gcc_assert (!m->fs.fp_valid
13743 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
13745 /* We must have *some* valid pointer to the stack frame. */
13746 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
13748 /* The DRAP is never valid at this point. */
13749 gcc_assert (!m->fs.drap_valid);
13751 /* See the comment about red zone and frame
13752 pointer usage in ix86_expand_prologue. */
13753 if (frame_pointer_needed && frame.red_zone_size)
13754 emit_insn (gen_memory_blockage ());
13756 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
13757 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
13759 /* Determine the CFA offset of the end of the red-zone. */
13760 m->fs.red_zone_offset = 0;
13761 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
13763 /* The red-zone begins below return address and error code in
13764 exception handler. */
13765 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
13767 /* When the register save area is in the aligned portion of
13768 the stack, determine the maximum runtime displacement that
13769 matches up with the aligned frame. */
13770 if (stack_realign_drap)
13771 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
13772 + UNITS_PER_WORD);
13775 /* Special care must be taken for the normal return case of a function
13776 using eh_return: the eax and edx registers are marked as saved, but
13777 not restored along this path. Adjust the save location to match. */
13778 if (crtl->calls_eh_return && style != 2)
13779 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
13781 /* EH_RETURN requires the use of moves to function properly. */
13782 if (crtl->calls_eh_return)
13783 restore_regs_via_mov = true;
13784 /* SEH requires the use of pops to identify the epilogue. */
13785 else if (TARGET_SEH)
13786 restore_regs_via_mov = false;
13787 /* If we're only restoring one register and sp cannot be used then
13788 using a move instruction to restore the register since it's
13789 less work than reloading sp and popping the register. */
13790 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
13791 restore_regs_via_mov = true;
13792 else if (TARGET_EPILOGUE_USING_MOVE
13793 && cfun->machine->use_fast_prologue_epilogue
13794 && (frame.nregs > 1
13795 || m->fs.sp_offset != frame.reg_save_offset))
13796 restore_regs_via_mov = true;
13797 else if (frame_pointer_needed
13798 && !frame.nregs
13799 && m->fs.sp_offset != frame.reg_save_offset)
13800 restore_regs_via_mov = true;
13801 else if (frame_pointer_needed
13802 && TARGET_USE_LEAVE
13803 && cfun->machine->use_fast_prologue_epilogue
13804 && frame.nregs == 1)
13805 restore_regs_via_mov = true;
13806 else
13807 restore_regs_via_mov = false;
13809 if (restore_regs_via_mov || frame.nsseregs)
13811 /* Ensure that the entire register save area is addressable via
13812 the stack pointer, if we will restore SSE regs via sp. */
13813 if (TARGET_64BIT
13814 && m->fs.sp_offset > 0x7fffffff
13815 && sp_valid_at (frame.stack_realign_offset + 1)
13816 && (frame.nsseregs + frame.nregs) != 0)
13818 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13819 GEN_INT (m->fs.sp_offset
13820 - frame.sse_reg_save_offset),
13821 style,
13822 m->fs.cfa_reg == stack_pointer_rtx);
13826 /* If there are any SSE registers to restore, then we have to do it
13827 via moves, since there's obviously no pop for SSE regs. */
13828 if (frame.nsseregs)
13829 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
13830 style == 2);
13832 if (m->call_ms2sysv)
13834 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
13836 /* We cannot use a tail-call for the stub if:
13837 1. We have to pop incoming args,
13838 2. We have additional int regs to restore, or
13839 3. A sibling call will be the tail-call, or
13840 4. We are emitting an eh_return_internal epilogue.
13842 TODO: Item 4 has not yet tested!
13844 If any of the above are true, we will call the stub rather than
13845 jump to it. */
13846 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
13847 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
13850 /* If using out-of-line stub that is a tail-call, then...*/
13851 if (m->call_ms2sysv && restore_stub_is_tail)
13853 /* TODO: parinoid tests. (remove eventually) */
13854 gcc_assert (m->fs.sp_valid);
13855 gcc_assert (!m->fs.sp_realigned);
13856 gcc_assert (!m->fs.fp_valid);
13857 gcc_assert (!m->fs.realigned);
13858 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
13859 gcc_assert (!crtl->drap_reg);
13860 gcc_assert (!frame.nregs);
13862 else if (restore_regs_via_mov)
13864 rtx t;
13866 if (frame.nregs)
13867 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
13869 /* eh_return epilogues need %ecx added to the stack pointer. */
13870 if (style == 2)
13872 rtx sa = EH_RETURN_STACKADJ_RTX;
13873 rtx_insn *insn;
13875 /* %ecx can't be used for both DRAP register and eh_return. */
13876 if (crtl->drap_reg)
13877 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
13879 /* regparm nested functions don't work with eh_return. */
13880 gcc_assert (!ix86_static_chain_on_stack);
13882 if (frame_pointer_needed)
13884 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
13885 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
13886 emit_insn (gen_rtx_SET (sa, t));
13888 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
13889 insn = emit_move_insn (hard_frame_pointer_rtx, t);
13891 /* Note that we use SA as a temporary CFA, as the return
13892 address is at the proper place relative to it. We
13893 pretend this happens at the FP restore insn because
13894 prior to this insn the FP would be stored at the wrong
13895 offset relative to SA, and after this insn we have no
13896 other reasonable register to use for the CFA. We don't
13897 bother resetting the CFA to the SP for the duration of
13898 the return insn. */
13899 add_reg_note (insn, REG_CFA_DEF_CFA,
13900 plus_constant (Pmode, sa, UNITS_PER_WORD));
13901 ix86_add_queued_cfa_restore_notes (insn);
13902 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
13903 RTX_FRAME_RELATED_P (insn) = 1;
13905 m->fs.cfa_reg = sa;
13906 m->fs.cfa_offset = UNITS_PER_WORD;
13907 m->fs.fp_valid = false;
13909 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
13910 const0_rtx, style, false);
13912 else
13914 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
13915 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
13916 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
13917 ix86_add_queued_cfa_restore_notes (insn);
13919 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13920 if (m->fs.cfa_offset != UNITS_PER_WORD)
13922 m->fs.cfa_offset = UNITS_PER_WORD;
13923 add_reg_note (insn, REG_CFA_DEF_CFA,
13924 plus_constant (Pmode, stack_pointer_rtx,
13925 UNITS_PER_WORD));
13926 RTX_FRAME_RELATED_P (insn) = 1;
13929 m->fs.sp_offset = UNITS_PER_WORD;
13930 m->fs.sp_valid = true;
13931 m->fs.sp_realigned = false;
13934 else
13936 /* SEH requires that the function end with (1) a stack adjustment
13937 if necessary, (2) a sequence of pops, and (3) a return or
13938 jump instruction. Prevent insns from the function body from
13939 being scheduled into this sequence. */
13940 if (TARGET_SEH)
13942 /* Prevent a catch region from being adjacent to the standard
13943 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
13944 several other flags that would be interesting to test are
13945 not yet set up. */
13946 if (flag_non_call_exceptions)
13947 emit_insn (gen_nops (const1_rtx));
13948 else
13949 emit_insn (gen_blockage ());
13952 /* First step is to deallocate the stack frame so that we can
13953 pop the registers. If the stack pointer was realigned, it needs
13954 to be restored now. Also do it on SEH target for very large
13955 frame as the emitted instructions aren't allowed by the ABI
13956 in epilogues. */
13957 if (!m->fs.sp_valid || m->fs.sp_realigned
13958 || (TARGET_SEH
13959 && (m->fs.sp_offset - frame.reg_save_offset
13960 >= SEH_MAX_FRAME_SIZE)))
13962 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
13963 GEN_INT (m->fs.fp_offset
13964 - frame.reg_save_offset),
13965 style, false);
13967 else if (m->fs.sp_offset != frame.reg_save_offset)
13969 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13970 GEN_INT (m->fs.sp_offset
13971 - frame.reg_save_offset),
13972 style,
13973 m->fs.cfa_reg == stack_pointer_rtx);
13976 ix86_emit_restore_regs_using_pop ();
13979 /* If we used a stack pointer and haven't already got rid of it,
13980 then do so now. */
13981 if (m->fs.fp_valid)
13983 /* If the stack pointer is valid and pointing at the frame
13984 pointer store address, then we only need a pop. */
13985 if (sp_valid_at (frame.hfp_save_offset)
13986 && m->fs.sp_offset == frame.hfp_save_offset)
13987 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13988 /* Leave results in shorter dependency chains on CPUs that are
13989 able to grok it fast. */
13990 else if (TARGET_USE_LEAVE
13991 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
13992 || !cfun->machine->use_fast_prologue_epilogue)
13993 ix86_emit_leave (NULL);
13994 else
13996 pro_epilogue_adjust_stack (stack_pointer_rtx,
13997 hard_frame_pointer_rtx,
13998 const0_rtx, style, !using_drap);
13999 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14003 if (using_drap)
14005 int param_ptr_offset = UNITS_PER_WORD;
14006 rtx_insn *insn;
14008 gcc_assert (stack_realign_drap);
14010 if (ix86_static_chain_on_stack)
14011 param_ptr_offset += UNITS_PER_WORD;
14012 if (!call_used_regs[REGNO (crtl->drap_reg)])
14013 param_ptr_offset += UNITS_PER_WORD;
14015 insn = emit_insn (gen_rtx_SET
14016 (stack_pointer_rtx,
14017 gen_rtx_PLUS (Pmode,
14018 crtl->drap_reg,
14019 GEN_INT (-param_ptr_offset))));
14020 m->fs.cfa_reg = stack_pointer_rtx;
14021 m->fs.cfa_offset = param_ptr_offset;
14022 m->fs.sp_offset = param_ptr_offset;
14023 m->fs.realigned = false;
14025 add_reg_note (insn, REG_CFA_DEF_CFA,
14026 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14027 GEN_INT (param_ptr_offset)));
14028 RTX_FRAME_RELATED_P (insn) = 1;
14030 if (!call_used_regs[REGNO (crtl->drap_reg)])
14031 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
14034 /* At this point the stack pointer must be valid, and we must have
14035 restored all of the registers. We may not have deallocated the
14036 entire stack frame. We've delayed this until now because it may
14037 be possible to merge the local stack deallocation with the
14038 deallocation forced by ix86_static_chain_on_stack. */
14039 gcc_assert (m->fs.sp_valid);
14040 gcc_assert (!m->fs.sp_realigned);
14041 gcc_assert (!m->fs.fp_valid);
14042 gcc_assert (!m->fs.realigned);
14043 if (m->fs.sp_offset != UNITS_PER_WORD)
14045 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14046 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
14047 style, true);
14049 else
14050 ix86_add_queued_cfa_restore_notes (get_last_insn ());
14052 /* Sibcall epilogues don't want a return instruction. */
14053 if (style == 0)
14055 m->fs = frame_state_save;
14056 return;
14059 if (cfun->machine->func_type != TYPE_NORMAL)
14060 emit_jump_insn (gen_interrupt_return ());
14061 else if (crtl->args.pops_args && crtl->args.size)
14063 rtx popc = GEN_INT (crtl->args.pops_args);
14065 /* i386 can only pop 64K bytes. If asked to pop more, pop return
14066 address, do explicit add, and jump indirectly to the caller. */
14068 if (crtl->args.pops_args >= 65536)
14070 rtx ecx = gen_rtx_REG (SImode, CX_REG);
14071 rtx_insn *insn;
14073 /* There is no "pascal" calling convention in any 64bit ABI. */
14074 gcc_assert (!TARGET_64BIT);
14076 insn = emit_insn (gen_pop (ecx));
14077 m->fs.cfa_offset -= UNITS_PER_WORD;
14078 m->fs.sp_offset -= UNITS_PER_WORD;
14080 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14081 x = gen_rtx_SET (stack_pointer_rtx, x);
14082 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14083 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
14084 RTX_FRAME_RELATED_P (insn) = 1;
14086 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14087 popc, -1, true);
14088 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
14090 else
14091 emit_jump_insn (gen_simple_return_pop_internal (popc));
14093 else if (!m->call_ms2sysv || !restore_stub_is_tail)
14094 emit_jump_insn (gen_simple_return_internal ());
14096 /* Restore the state back to the state from the prologue,
14097 so that it's correct for the next epilogue. */
14098 m->fs = frame_state_save;
14101 /* Reset from the function's potential modifications. */
14103 static void
14104 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
14106 if (pic_offset_table_rtx
14107 && !ix86_use_pseudo_pic_reg ())
14108 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
14110 if (TARGET_MACHO)
14112 rtx_insn *insn = get_last_insn ();
14113 rtx_insn *deleted_debug_label = NULL;
14115 /* Mach-O doesn't support labels at the end of objects, so if
14116 it looks like we might want one, take special action.
14117 First, collect any sequence of deleted debug labels. */
14118 while (insn
14119 && NOTE_P (insn)
14120 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
14122 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
14123 notes only, instead set their CODE_LABEL_NUMBER to -1,
14124 otherwise there would be code generation differences
14125 in between -g and -g0. */
14126 if (NOTE_P (insn) && NOTE_KIND (insn)
14127 == NOTE_INSN_DELETED_DEBUG_LABEL)
14128 deleted_debug_label = insn;
14129 insn = PREV_INSN (insn);
14132 /* If we have:
14133 label:
14134 barrier
14135 then this needs to be detected, so skip past the barrier. */
14137 if (insn && BARRIER_P (insn))
14138 insn = PREV_INSN (insn);
14140 /* Up to now we've only seen notes or barriers. */
14141 if (insn)
14143 if (LABEL_P (insn)
14144 || (NOTE_P (insn)
14145 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
14146 /* Trailing label. */
14147 fputs ("\tnop\n", file);
14148 else if (cfun && ! cfun->is_thunk)
14150 /* See if we have a completely empty function body, skipping
14151 the special case of the picbase thunk emitted as asm. */
14152 while (insn && ! INSN_P (insn))
14153 insn = PREV_INSN (insn);
14154 /* If we don't find any insns, we've got an empty function body;
14155 I.e. completely empty - without a return or branch. This is
14156 taken as the case where a function body has been removed
14157 because it contains an inline __builtin_unreachable(). GCC
14158 declares that reaching __builtin_unreachable() means UB so
14159 we're not obliged to do anything special; however, we want
14160 non-zero-sized function bodies. To meet this, and help the
14161 user out, let's trap the case. */
14162 if (insn == NULL)
14163 fputs ("\tud2\n", file);
14166 else if (deleted_debug_label)
14167 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
14168 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
14169 CODE_LABEL_NUMBER (insn) = -1;
14173 /* Return a scratch register to use in the split stack prologue. The
14174 split stack prologue is used for -fsplit-stack. It is the first
14175 instructions in the function, even before the regular prologue.
14176 The scratch register can be any caller-saved register which is not
14177 used for parameters or for the static chain. */
14179 static unsigned int
14180 split_stack_prologue_scratch_regno (void)
14182 if (TARGET_64BIT)
14183 return R11_REG;
14184 else
14186 bool is_fastcall, is_thiscall;
14187 int regparm;
14189 is_fastcall = (lookup_attribute ("fastcall",
14190 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14191 != NULL);
14192 is_thiscall = (lookup_attribute ("thiscall",
14193 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14194 != NULL);
14195 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
14197 if (is_fastcall)
14199 if (DECL_STATIC_CHAIN (cfun->decl))
14201 sorry ("-fsplit-stack does not support fastcall with "
14202 "nested function");
14203 return INVALID_REGNUM;
14205 return AX_REG;
14207 else if (is_thiscall)
14209 if (!DECL_STATIC_CHAIN (cfun->decl))
14210 return DX_REG;
14211 return AX_REG;
14213 else if (regparm < 3)
14215 if (!DECL_STATIC_CHAIN (cfun->decl))
14216 return CX_REG;
14217 else
14219 if (regparm >= 2)
14221 sorry ("-fsplit-stack does not support 2 register "
14222 "parameters for a nested function");
14223 return INVALID_REGNUM;
14225 return DX_REG;
14228 else
14230 /* FIXME: We could make this work by pushing a register
14231 around the addition and comparison. */
14232 sorry ("-fsplit-stack does not support 3 register parameters");
14233 return INVALID_REGNUM;
14238 /* A SYMBOL_REF for the function which allocates new stackspace for
14239 -fsplit-stack. */
14241 static GTY(()) rtx split_stack_fn;
14243 /* A SYMBOL_REF for the more stack function when using the large
14244 model. */
14246 static GTY(()) rtx split_stack_fn_large;
14248 /* Return location of the stack guard value in the TLS block. */
14251 ix86_split_stack_guard (void)
14253 int offset;
14254 addr_space_t as = DEFAULT_TLS_SEG_REG;
14255 rtx r;
14257 gcc_assert (flag_split_stack);
14259 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14260 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14261 #else
14262 gcc_unreachable ();
14263 #endif
14265 r = GEN_INT (offset);
14266 r = gen_const_mem (Pmode, r);
14267 set_mem_addr_space (r, as);
14269 return r;
14272 /* Handle -fsplit-stack. These are the first instructions in the
14273 function, even before the regular prologue. */
14275 void
14276 ix86_expand_split_stack_prologue (void)
14278 struct ix86_frame frame;
14279 HOST_WIDE_INT allocate;
14280 unsigned HOST_WIDE_INT args_size;
14281 rtx_code_label *label;
14282 rtx limit, current, allocate_rtx, call_insn, call_fusage;
14283 rtx scratch_reg = NULL_RTX;
14284 rtx_code_label *varargs_label = NULL;
14285 rtx fn;
14287 gcc_assert (flag_split_stack && reload_completed);
14289 ix86_finalize_stack_frame_flags ();
14290 frame = cfun->machine->frame;
14291 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
14293 /* This is the label we will branch to if we have enough stack
14294 space. We expect the basic block reordering pass to reverse this
14295 branch if optimizing, so that we branch in the unlikely case. */
14296 label = gen_label_rtx ();
14298 /* We need to compare the stack pointer minus the frame size with
14299 the stack boundary in the TCB. The stack boundary always gives
14300 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
14301 can compare directly. Otherwise we need to do an addition. */
14303 limit = ix86_split_stack_guard ();
14305 if (allocate < SPLIT_STACK_AVAILABLE)
14306 current = stack_pointer_rtx;
14307 else
14309 unsigned int scratch_regno;
14310 rtx offset;
14312 /* We need a scratch register to hold the stack pointer minus
14313 the required frame size. Since this is the very start of the
14314 function, the scratch register can be any caller-saved
14315 register which is not used for parameters. */
14316 offset = GEN_INT (- allocate);
14317 scratch_regno = split_stack_prologue_scratch_regno ();
14318 if (scratch_regno == INVALID_REGNUM)
14319 return;
14320 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
14321 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
14323 /* We don't use ix86_gen_add3 in this case because it will
14324 want to split to lea, but when not optimizing the insn
14325 will not be split after this point. */
14326 emit_insn (gen_rtx_SET (scratch_reg,
14327 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14328 offset)));
14330 else
14332 emit_move_insn (scratch_reg, offset);
14333 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
14334 stack_pointer_rtx));
14336 current = scratch_reg;
14339 ix86_expand_branch (GEU, current, limit, label);
14340 rtx_insn *jump_insn = get_last_insn ();
14341 JUMP_LABEL (jump_insn) = label;
14343 /* Mark the jump as very likely to be taken. */
14344 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
14346 if (split_stack_fn == NULL_RTX)
14348 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
14349 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
14351 fn = split_stack_fn;
14353 /* Get more stack space. We pass in the desired stack space and the
14354 size of the arguments to copy to the new stack. In 32-bit mode
14355 we push the parameters; __morestack will return on a new stack
14356 anyhow. In 64-bit mode we pass the parameters in r10 and
14357 r11. */
14358 allocate_rtx = GEN_INT (allocate);
14359 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
14360 call_fusage = NULL_RTX;
14361 rtx pop = NULL_RTX;
14362 if (TARGET_64BIT)
14364 rtx reg10, reg11;
14366 reg10 = gen_rtx_REG (Pmode, R10_REG);
14367 reg11 = gen_rtx_REG (Pmode, R11_REG);
14369 /* If this function uses a static chain, it will be in %r10.
14370 Preserve it across the call to __morestack. */
14371 if (DECL_STATIC_CHAIN (cfun->decl))
14373 rtx rax;
14375 rax = gen_rtx_REG (word_mode, AX_REG);
14376 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
14377 use_reg (&call_fusage, rax);
14380 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
14381 && !TARGET_PECOFF)
14383 HOST_WIDE_INT argval;
14385 gcc_assert (Pmode == DImode);
14386 /* When using the large model we need to load the address
14387 into a register, and we've run out of registers. So we
14388 switch to a different calling convention, and we call a
14389 different function: __morestack_large. We pass the
14390 argument size in the upper 32 bits of r10 and pass the
14391 frame size in the lower 32 bits. */
14392 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
14393 gcc_assert ((args_size & 0xffffffff) == args_size);
14395 if (split_stack_fn_large == NULL_RTX)
14397 split_stack_fn_large =
14398 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
14399 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
14401 if (ix86_cmodel == CM_LARGE_PIC)
14403 rtx_code_label *label;
14404 rtx x;
14406 label = gen_label_rtx ();
14407 emit_label (label);
14408 LABEL_PRESERVE_P (label) = 1;
14409 emit_insn (gen_set_rip_rex64 (reg10, label));
14410 emit_insn (gen_set_got_offset_rex64 (reg11, label));
14411 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
14412 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
14413 UNSPEC_GOT);
14414 x = gen_rtx_CONST (Pmode, x);
14415 emit_move_insn (reg11, x);
14416 x = gen_rtx_PLUS (Pmode, reg10, reg11);
14417 x = gen_const_mem (Pmode, x);
14418 emit_move_insn (reg11, x);
14420 else
14421 emit_move_insn (reg11, split_stack_fn_large);
14423 fn = reg11;
14425 argval = ((args_size << 16) << 16) + allocate;
14426 emit_move_insn (reg10, GEN_INT (argval));
14428 else
14430 emit_move_insn (reg10, allocate_rtx);
14431 emit_move_insn (reg11, GEN_INT (args_size));
14432 use_reg (&call_fusage, reg11);
14435 use_reg (&call_fusage, reg10);
14437 else
14439 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
14440 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
14441 insn = emit_insn (gen_push (allocate_rtx));
14442 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
14443 pop = GEN_INT (2 * UNITS_PER_WORD);
14445 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
14446 GEN_INT (UNITS_PER_WORD), constm1_rtx,
14447 pop, false);
14448 add_function_usage_to (call_insn, call_fusage);
14449 if (!TARGET_64BIT)
14450 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
14451 /* Indicate that this function can't jump to non-local gotos. */
14452 make_reg_eh_region_note_nothrow_nononlocal (as_a <rtx_insn *> (call_insn));
14454 /* In order to make call/return prediction work right, we now need
14455 to execute a return instruction. See
14456 libgcc/config/i386/morestack.S for the details on how this works.
14458 For flow purposes gcc must not see this as a return
14459 instruction--we need control flow to continue at the subsequent
14460 label. Therefore, we use an unspec. */
14461 gcc_assert (crtl->args.pops_args < 65536);
14462 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
14464 /* If we are in 64-bit mode and this function uses a static chain,
14465 we saved %r10 in %rax before calling _morestack. */
14466 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
14467 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
14468 gen_rtx_REG (word_mode, AX_REG));
14470 /* If this function calls va_start, we need to store a pointer to
14471 the arguments on the old stack, because they may not have been
14472 all copied to the new stack. At this point the old stack can be
14473 found at the frame pointer value used by __morestack, because
14474 __morestack has set that up before calling back to us. Here we
14475 store that pointer in a scratch register, and in
14476 ix86_expand_prologue we store the scratch register in a stack
14477 slot. */
14478 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14480 unsigned int scratch_regno;
14481 rtx frame_reg;
14482 int words;
14484 scratch_regno = split_stack_prologue_scratch_regno ();
14485 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
14486 frame_reg = gen_rtx_REG (Pmode, BP_REG);
14488 /* 64-bit:
14489 fp -> old fp value
14490 return address within this function
14491 return address of caller of this function
14492 stack arguments
14493 So we add three words to get to the stack arguments.
14495 32-bit:
14496 fp -> old fp value
14497 return address within this function
14498 first argument to __morestack
14499 second argument to __morestack
14500 return address of caller of this function
14501 stack arguments
14502 So we add five words to get to the stack arguments.
14504 words = TARGET_64BIT ? 3 : 5;
14505 emit_insn (gen_rtx_SET (scratch_reg,
14506 gen_rtx_PLUS (Pmode, frame_reg,
14507 GEN_INT (words * UNITS_PER_WORD))));
14509 varargs_label = gen_label_rtx ();
14510 emit_jump_insn (gen_jump (varargs_label));
14511 JUMP_LABEL (get_last_insn ()) = varargs_label;
14513 emit_barrier ();
14516 emit_label (label);
14517 LABEL_NUSES (label) = 1;
14519 /* If this function calls va_start, we now have to set the scratch
14520 register for the case where we do not call __morestack. In this
14521 case we need to set it based on the stack pointer. */
14522 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14524 emit_insn (gen_rtx_SET (scratch_reg,
14525 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14526 GEN_INT (UNITS_PER_WORD))));
14528 emit_label (varargs_label);
14529 LABEL_NUSES (varargs_label) = 1;
14533 /* We may have to tell the dataflow pass that the split stack prologue
14534 is initializing a scratch register. */
14536 static void
14537 ix86_live_on_entry (bitmap regs)
14539 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14541 gcc_assert (flag_split_stack);
14542 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
14546 /* Extract the parts of an RTL expression that is a valid memory address
14547 for an instruction. Return 0 if the structure of the address is
14548 grossly off. Return -1 if the address contains ASHIFT, so it is not
14549 strictly valid, but still used for computing length of lea instruction. */
14552 ix86_decompose_address (rtx addr, struct ix86_address *out)
14554 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
14555 rtx base_reg, index_reg;
14556 HOST_WIDE_INT scale = 1;
14557 rtx scale_rtx = NULL_RTX;
14558 rtx tmp;
14559 int retval = 1;
14560 addr_space_t seg = ADDR_SPACE_GENERIC;
14562 /* Allow zero-extended SImode addresses,
14563 they will be emitted with addr32 prefix. */
14564 if (TARGET_64BIT && GET_MODE (addr) == DImode)
14566 if (GET_CODE (addr) == ZERO_EXTEND
14567 && GET_MODE (XEXP (addr, 0)) == SImode)
14569 addr = XEXP (addr, 0);
14570 if (CONST_INT_P (addr))
14571 return 0;
14573 else if (GET_CODE (addr) == AND
14574 && const_32bit_mask (XEXP (addr, 1), DImode))
14576 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
14577 if (addr == NULL_RTX)
14578 return 0;
14580 if (CONST_INT_P (addr))
14581 return 0;
14585 /* Allow SImode subregs of DImode addresses,
14586 they will be emitted with addr32 prefix. */
14587 if (TARGET_64BIT && GET_MODE (addr) == SImode)
14589 if (SUBREG_P (addr)
14590 && GET_MODE (SUBREG_REG (addr)) == DImode)
14592 addr = SUBREG_REG (addr);
14593 if (CONST_INT_P (addr))
14594 return 0;
14598 if (REG_P (addr))
14599 base = addr;
14600 else if (SUBREG_P (addr))
14602 if (REG_P (SUBREG_REG (addr)))
14603 base = addr;
14604 else
14605 return 0;
14607 else if (GET_CODE (addr) == PLUS)
14609 rtx addends[4], op;
14610 int n = 0, i;
14612 op = addr;
14615 if (n >= 4)
14616 return 0;
14617 addends[n++] = XEXP (op, 1);
14618 op = XEXP (op, 0);
14620 while (GET_CODE (op) == PLUS);
14621 if (n >= 4)
14622 return 0;
14623 addends[n] = op;
14625 for (i = n; i >= 0; --i)
14627 op = addends[i];
14628 switch (GET_CODE (op))
14630 case MULT:
14631 if (index)
14632 return 0;
14633 index = XEXP (op, 0);
14634 scale_rtx = XEXP (op, 1);
14635 break;
14637 case ASHIFT:
14638 if (index)
14639 return 0;
14640 index = XEXP (op, 0);
14641 tmp = XEXP (op, 1);
14642 if (!CONST_INT_P (tmp))
14643 return 0;
14644 scale = INTVAL (tmp);
14645 if ((unsigned HOST_WIDE_INT) scale > 3)
14646 return 0;
14647 scale = 1 << scale;
14648 break;
14650 case ZERO_EXTEND:
14651 op = XEXP (op, 0);
14652 if (GET_CODE (op) != UNSPEC)
14653 return 0;
14654 /* FALLTHRU */
14656 case UNSPEC:
14657 if (XINT (op, 1) == UNSPEC_TP
14658 && TARGET_TLS_DIRECT_SEG_REFS
14659 && seg == ADDR_SPACE_GENERIC)
14660 seg = DEFAULT_TLS_SEG_REG;
14661 else
14662 return 0;
14663 break;
14665 case SUBREG:
14666 if (!REG_P (SUBREG_REG (op)))
14667 return 0;
14668 /* FALLTHRU */
14670 case REG:
14671 if (!base)
14672 base = op;
14673 else if (!index)
14674 index = op;
14675 else
14676 return 0;
14677 break;
14679 case CONST:
14680 case CONST_INT:
14681 case SYMBOL_REF:
14682 case LABEL_REF:
14683 if (disp)
14684 return 0;
14685 disp = op;
14686 break;
14688 default:
14689 return 0;
14693 else if (GET_CODE (addr) == MULT)
14695 index = XEXP (addr, 0); /* index*scale */
14696 scale_rtx = XEXP (addr, 1);
14698 else if (GET_CODE (addr) == ASHIFT)
14700 /* We're called for lea too, which implements ashift on occasion. */
14701 index = XEXP (addr, 0);
14702 tmp = XEXP (addr, 1);
14703 if (!CONST_INT_P (tmp))
14704 return 0;
14705 scale = INTVAL (tmp);
14706 if ((unsigned HOST_WIDE_INT) scale > 3)
14707 return 0;
14708 scale = 1 << scale;
14709 retval = -1;
14711 else
14712 disp = addr; /* displacement */
14714 if (index)
14716 if (REG_P (index))
14718 else if (SUBREG_P (index)
14719 && REG_P (SUBREG_REG (index)))
14721 else
14722 return 0;
14725 /* Extract the integral value of scale. */
14726 if (scale_rtx)
14728 if (!CONST_INT_P (scale_rtx))
14729 return 0;
14730 scale = INTVAL (scale_rtx);
14733 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
14734 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
14736 /* Avoid useless 0 displacement. */
14737 if (disp == const0_rtx && (base || index))
14738 disp = NULL_RTX;
14740 /* Allow arg pointer and stack pointer as index if there is not scaling. */
14741 if (base_reg && index_reg && scale == 1
14742 && (REGNO (index_reg) == ARG_POINTER_REGNUM
14743 || REGNO (index_reg) == FRAME_POINTER_REGNUM
14744 || REGNO (index_reg) == SP_REG))
14746 std::swap (base, index);
14747 std::swap (base_reg, index_reg);
14750 /* Special case: %ebp cannot be encoded as a base without a displacement.
14751 Similarly %r13. */
14752 if (!disp && base_reg
14753 && (REGNO (base_reg) == ARG_POINTER_REGNUM
14754 || REGNO (base_reg) == FRAME_POINTER_REGNUM
14755 || REGNO (base_reg) == BP_REG
14756 || REGNO (base_reg) == R13_REG))
14757 disp = const0_rtx;
14759 /* Special case: on K6, [%esi] makes the instruction vector decoded.
14760 Avoid this by transforming to [%esi+0].
14761 Reload calls address legitimization without cfun defined, so we need
14762 to test cfun for being non-NULL. */
14763 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
14764 && base_reg && !index_reg && !disp
14765 && REGNO (base_reg) == SI_REG)
14766 disp = const0_rtx;
14768 /* Special case: encode reg+reg instead of reg*2. */
14769 if (!base && index && scale == 2)
14770 base = index, base_reg = index_reg, scale = 1;
14772 /* Special case: scaling cannot be encoded without base or displacement. */
14773 if (!base && !disp && index && scale != 1)
14774 disp = const0_rtx;
14776 out->base = base;
14777 out->index = index;
14778 out->disp = disp;
14779 out->scale = scale;
14780 out->seg = seg;
14782 return retval;
14785 /* Return cost of the memory address x.
14786 For i386, it is better to use a complex address than let gcc copy
14787 the address into a reg and make a new pseudo. But not if the address
14788 requires to two regs - that would mean more pseudos with longer
14789 lifetimes. */
14790 static int
14791 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
14793 struct ix86_address parts;
14794 int cost = 1;
14795 int ok = ix86_decompose_address (x, &parts);
14797 gcc_assert (ok);
14799 if (parts.base && SUBREG_P (parts.base))
14800 parts.base = SUBREG_REG (parts.base);
14801 if (parts.index && SUBREG_P (parts.index))
14802 parts.index = SUBREG_REG (parts.index);
14804 /* Attempt to minimize number of registers in the address by increasing
14805 address cost for each used register. We don't increase address cost
14806 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
14807 is not invariant itself it most likely means that base or index is not
14808 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
14809 which is not profitable for x86. */
14810 if (parts.base
14811 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
14812 && (current_pass->type == GIMPLE_PASS
14813 || !pic_offset_table_rtx
14814 || !REG_P (parts.base)
14815 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
14816 cost++;
14818 if (parts.index
14819 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
14820 && (current_pass->type == GIMPLE_PASS
14821 || !pic_offset_table_rtx
14822 || !REG_P (parts.index)
14823 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
14824 cost++;
14826 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
14827 since it's predecode logic can't detect the length of instructions
14828 and it degenerates to vector decoded. Increase cost of such
14829 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
14830 to split such addresses or even refuse such addresses at all.
14832 Following addressing modes are affected:
14833 [base+scale*index]
14834 [scale*index+disp]
14835 [base+index]
14837 The first and last case may be avoidable by explicitly coding the zero in
14838 memory address, but I don't have AMD-K6 machine handy to check this
14839 theory. */
14841 if (TARGET_K6
14842 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
14843 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
14844 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
14845 cost += 10;
14847 return cost;
14850 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
14851 this is used for to form addresses to local data when -fPIC is in
14852 use. */
14854 static bool
14855 darwin_local_data_pic (rtx disp)
14857 return (GET_CODE (disp) == UNSPEC
14858 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
14861 /* True if operand X should be loaded from GOT. */
14863 bool
14864 ix86_force_load_from_GOT_p (rtx x)
14866 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
14867 && !TARGET_PECOFF && !TARGET_MACHO
14868 && !flag_plt && !flag_pic
14869 && ix86_cmodel != CM_LARGE
14870 && GET_CODE (x) == SYMBOL_REF
14871 && SYMBOL_REF_FUNCTION_P (x)
14872 && !SYMBOL_REF_LOCAL_P (x));
14875 /* Determine if a given RTX is a valid constant. We already know this
14876 satisfies CONSTANT_P. */
14878 static bool
14879 ix86_legitimate_constant_p (machine_mode mode, rtx x)
14881 /* Pointer bounds constants are not valid. */
14882 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
14883 return false;
14885 switch (GET_CODE (x))
14887 case CONST:
14888 x = XEXP (x, 0);
14890 if (GET_CODE (x) == PLUS)
14892 if (!CONST_INT_P (XEXP (x, 1)))
14893 return false;
14894 x = XEXP (x, 0);
14897 if (TARGET_MACHO && darwin_local_data_pic (x))
14898 return true;
14900 /* Only some unspecs are valid as "constants". */
14901 if (GET_CODE (x) == UNSPEC)
14902 switch (XINT (x, 1))
14904 case UNSPEC_GOT:
14905 case UNSPEC_GOTOFF:
14906 case UNSPEC_PLTOFF:
14907 return TARGET_64BIT;
14908 case UNSPEC_TPOFF:
14909 case UNSPEC_NTPOFF:
14910 x = XVECEXP (x, 0, 0);
14911 return (GET_CODE (x) == SYMBOL_REF
14912 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14913 case UNSPEC_DTPOFF:
14914 x = XVECEXP (x, 0, 0);
14915 return (GET_CODE (x) == SYMBOL_REF
14916 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
14917 default:
14918 return false;
14921 /* We must have drilled down to a symbol. */
14922 if (GET_CODE (x) == LABEL_REF)
14923 return true;
14924 if (GET_CODE (x) != SYMBOL_REF)
14925 return false;
14926 /* FALLTHRU */
14928 case SYMBOL_REF:
14929 /* TLS symbols are never valid. */
14930 if (SYMBOL_REF_TLS_MODEL (x))
14931 return false;
14933 /* DLLIMPORT symbols are never valid. */
14934 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14935 && SYMBOL_REF_DLLIMPORT_P (x))
14936 return false;
14938 #if TARGET_MACHO
14939 /* mdynamic-no-pic */
14940 if (MACHO_DYNAMIC_NO_PIC_P)
14941 return machopic_symbol_defined_p (x);
14942 #endif
14944 /* External function address should be loaded
14945 via the GOT slot to avoid PLT. */
14946 if (ix86_force_load_from_GOT_p (x))
14947 return false;
14949 break;
14951 CASE_CONST_SCALAR_INT:
14952 switch (mode)
14954 case E_TImode:
14955 if (TARGET_64BIT)
14956 return true;
14957 /* FALLTHRU */
14958 case E_OImode:
14959 case E_XImode:
14960 if (!standard_sse_constant_p (x, mode))
14961 return false;
14962 default:
14963 break;
14965 break;
14967 case CONST_VECTOR:
14968 if (!standard_sse_constant_p (x, mode))
14969 return false;
14971 default:
14972 break;
14975 /* Otherwise we handle everything else in the move patterns. */
14976 return true;
14979 /* Determine if it's legal to put X into the constant pool. This
14980 is not possible for the address of thread-local symbols, which
14981 is checked above. */
14983 static bool
14984 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
14986 /* We can put any immediate constant in memory. */
14987 switch (GET_CODE (x))
14989 CASE_CONST_ANY:
14990 return false;
14992 default:
14993 break;
14996 return !ix86_legitimate_constant_p (mode, x);
14999 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
15000 otherwise zero. */
15002 static bool
15003 is_imported_p (rtx x)
15005 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
15006 || GET_CODE (x) != SYMBOL_REF)
15007 return false;
15009 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
15013 /* Nonzero if the constant value X is a legitimate general operand
15014 when generating PIC code. It is given that flag_pic is on and
15015 that X satisfies CONSTANT_P. */
15017 bool
15018 legitimate_pic_operand_p (rtx x)
15020 rtx inner;
15022 switch (GET_CODE (x))
15024 case CONST:
15025 inner = XEXP (x, 0);
15026 if (GET_CODE (inner) == PLUS
15027 && CONST_INT_P (XEXP (inner, 1)))
15028 inner = XEXP (inner, 0);
15030 /* Only some unspecs are valid as "constants". */
15031 if (GET_CODE (inner) == UNSPEC)
15032 switch (XINT (inner, 1))
15034 case UNSPEC_GOT:
15035 case UNSPEC_GOTOFF:
15036 case UNSPEC_PLTOFF:
15037 return TARGET_64BIT;
15038 case UNSPEC_TPOFF:
15039 x = XVECEXP (inner, 0, 0);
15040 return (GET_CODE (x) == SYMBOL_REF
15041 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
15042 case UNSPEC_MACHOPIC_OFFSET:
15043 return legitimate_pic_address_disp_p (x);
15044 default:
15045 return false;
15047 /* FALLTHRU */
15049 case SYMBOL_REF:
15050 case LABEL_REF:
15051 return legitimate_pic_address_disp_p (x);
15053 default:
15054 return true;
15058 /* Determine if a given CONST RTX is a valid memory displacement
15059 in PIC mode. */
15061 bool
15062 legitimate_pic_address_disp_p (rtx disp)
15064 bool saw_plus;
15066 /* In 64bit mode we can allow direct addresses of symbols and labels
15067 when they are not dynamic symbols. */
15068 if (TARGET_64BIT)
15070 rtx op0 = disp, op1;
15072 switch (GET_CODE (disp))
15074 case LABEL_REF:
15075 return true;
15077 case CONST:
15078 if (GET_CODE (XEXP (disp, 0)) != PLUS)
15079 break;
15080 op0 = XEXP (XEXP (disp, 0), 0);
15081 op1 = XEXP (XEXP (disp, 0), 1);
15082 if (!CONST_INT_P (op1))
15083 break;
15084 if (GET_CODE (op0) == UNSPEC
15085 && (XINT (op0, 1) == UNSPEC_DTPOFF
15086 || XINT (op0, 1) == UNSPEC_NTPOFF)
15087 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
15088 return true;
15089 if (INTVAL (op1) >= 16*1024*1024
15090 || INTVAL (op1) < -16*1024*1024)
15091 break;
15092 if (GET_CODE (op0) == LABEL_REF)
15093 return true;
15094 if (GET_CODE (op0) == CONST
15095 && GET_CODE (XEXP (op0, 0)) == UNSPEC
15096 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
15097 return true;
15098 if (GET_CODE (op0) == UNSPEC
15099 && XINT (op0, 1) == UNSPEC_PCREL)
15100 return true;
15101 if (GET_CODE (op0) != SYMBOL_REF)
15102 break;
15103 /* FALLTHRU */
15105 case SYMBOL_REF:
15106 /* TLS references should always be enclosed in UNSPEC.
15107 The dllimported symbol needs always to be resolved. */
15108 if (SYMBOL_REF_TLS_MODEL (op0)
15109 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
15110 return false;
15112 if (TARGET_PECOFF)
15114 if (is_imported_p (op0))
15115 return true;
15117 if (SYMBOL_REF_FAR_ADDR_P (op0)
15118 || !SYMBOL_REF_LOCAL_P (op0))
15119 break;
15121 /* Function-symbols need to be resolved only for
15122 large-model.
15123 For the small-model we don't need to resolve anything
15124 here. */
15125 if ((ix86_cmodel != CM_LARGE_PIC
15126 && SYMBOL_REF_FUNCTION_P (op0))
15127 || ix86_cmodel == CM_SMALL_PIC)
15128 return true;
15129 /* Non-external symbols don't need to be resolved for
15130 large, and medium-model. */
15131 if ((ix86_cmodel == CM_LARGE_PIC
15132 || ix86_cmodel == CM_MEDIUM_PIC)
15133 && !SYMBOL_REF_EXTERNAL_P (op0))
15134 return true;
15136 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
15137 && (SYMBOL_REF_LOCAL_P (op0)
15138 || (HAVE_LD_PIE_COPYRELOC
15139 && flag_pie
15140 && !SYMBOL_REF_WEAK (op0)
15141 && !SYMBOL_REF_FUNCTION_P (op0)))
15142 && ix86_cmodel != CM_LARGE_PIC)
15143 return true;
15144 break;
15146 default:
15147 break;
15150 if (GET_CODE (disp) != CONST)
15151 return false;
15152 disp = XEXP (disp, 0);
15154 if (TARGET_64BIT)
15156 /* We are unsafe to allow PLUS expressions. This limit allowed distance
15157 of GOT tables. We should not need these anyway. */
15158 if (GET_CODE (disp) != UNSPEC
15159 || (XINT (disp, 1) != UNSPEC_GOTPCREL
15160 && XINT (disp, 1) != UNSPEC_GOTOFF
15161 && XINT (disp, 1) != UNSPEC_PCREL
15162 && XINT (disp, 1) != UNSPEC_PLTOFF))
15163 return false;
15165 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
15166 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
15167 return false;
15168 return true;
15171 saw_plus = false;
15172 if (GET_CODE (disp) == PLUS)
15174 if (!CONST_INT_P (XEXP (disp, 1)))
15175 return false;
15176 disp = XEXP (disp, 0);
15177 saw_plus = true;
15180 if (TARGET_MACHO && darwin_local_data_pic (disp))
15181 return true;
15183 if (GET_CODE (disp) != UNSPEC)
15184 return false;
15186 switch (XINT (disp, 1))
15188 case UNSPEC_GOT:
15189 if (saw_plus)
15190 return false;
15191 /* We need to check for both symbols and labels because VxWorks loads
15192 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
15193 details. */
15194 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15195 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
15196 case UNSPEC_GOTOFF:
15197 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
15198 While ABI specify also 32bit relocation but we don't produce it in
15199 small PIC model at all. */
15200 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15201 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
15202 && !TARGET_64BIT)
15203 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
15204 return false;
15205 case UNSPEC_GOTTPOFF:
15206 case UNSPEC_GOTNTPOFF:
15207 case UNSPEC_INDNTPOFF:
15208 if (saw_plus)
15209 return false;
15210 disp = XVECEXP (disp, 0, 0);
15211 return (GET_CODE (disp) == SYMBOL_REF
15212 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
15213 case UNSPEC_NTPOFF:
15214 disp = XVECEXP (disp, 0, 0);
15215 return (GET_CODE (disp) == SYMBOL_REF
15216 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
15217 case UNSPEC_DTPOFF:
15218 disp = XVECEXP (disp, 0, 0);
15219 return (GET_CODE (disp) == SYMBOL_REF
15220 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
15223 return false;
15226 /* Determine if op is suitable RTX for an address register.
15227 Return naked register if a register or a register subreg is
15228 found, otherwise return NULL_RTX. */
15230 static rtx
15231 ix86_validate_address_register (rtx op)
15233 machine_mode mode = GET_MODE (op);
15235 /* Only SImode or DImode registers can form the address. */
15236 if (mode != SImode && mode != DImode)
15237 return NULL_RTX;
15239 if (REG_P (op))
15240 return op;
15241 else if (SUBREG_P (op))
15243 rtx reg = SUBREG_REG (op);
15245 if (!REG_P (reg))
15246 return NULL_RTX;
15248 mode = GET_MODE (reg);
15250 /* Don't allow SUBREGs that span more than a word. It can
15251 lead to spill failures when the register is one word out
15252 of a two word structure. */
15253 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15254 return NULL_RTX;
15256 /* Allow only SUBREGs of non-eliminable hard registers. */
15257 if (register_no_elim_operand (reg, mode))
15258 return reg;
15261 /* Op is not a register. */
15262 return NULL_RTX;
15265 /* Recognizes RTL expressions that are valid memory addresses for an
15266 instruction. The MODE argument is the machine mode for the MEM
15267 expression that wants to use this address.
15269 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
15270 convert common non-canonical forms to canonical form so that they will
15271 be recognized. */
15273 static bool
15274 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
15276 struct ix86_address parts;
15277 rtx base, index, disp;
15278 HOST_WIDE_INT scale;
15279 addr_space_t seg;
15281 if (ix86_decompose_address (addr, &parts) <= 0)
15282 /* Decomposition failed. */
15283 return false;
15285 base = parts.base;
15286 index = parts.index;
15287 disp = parts.disp;
15288 scale = parts.scale;
15289 seg = parts.seg;
15291 /* Validate base register. */
15292 if (base)
15294 rtx reg = ix86_validate_address_register (base);
15296 if (reg == NULL_RTX)
15297 return false;
15299 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
15300 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
15301 /* Base is not valid. */
15302 return false;
15305 /* Validate index register. */
15306 if (index)
15308 rtx reg = ix86_validate_address_register (index);
15310 if (reg == NULL_RTX)
15311 return false;
15313 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
15314 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
15315 /* Index is not valid. */
15316 return false;
15319 /* Index and base should have the same mode. */
15320 if (base && index
15321 && GET_MODE (base) != GET_MODE (index))
15322 return false;
15324 /* Address override works only on the (%reg) part of %fs:(%reg). */
15325 if (seg != ADDR_SPACE_GENERIC
15326 && ((base && GET_MODE (base) != word_mode)
15327 || (index && GET_MODE (index) != word_mode)))
15328 return false;
15330 /* Validate scale factor. */
15331 if (scale != 1)
15333 if (!index)
15334 /* Scale without index. */
15335 return false;
15337 if (scale != 2 && scale != 4 && scale != 8)
15338 /* Scale is not a valid multiplier. */
15339 return false;
15342 /* Validate displacement. */
15343 if (disp)
15345 if (GET_CODE (disp) == CONST
15346 && GET_CODE (XEXP (disp, 0)) == UNSPEC
15347 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
15348 switch (XINT (XEXP (disp, 0), 1))
15350 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
15351 when used. While ABI specify also 32bit relocations, we
15352 don't produce them at all and use IP relative instead.
15353 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
15354 should be loaded via GOT. */
15355 case UNSPEC_GOT:
15356 if (!TARGET_64BIT
15357 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15358 goto is_legitimate_pic;
15359 /* FALLTHRU */
15360 case UNSPEC_GOTOFF:
15361 gcc_assert (flag_pic);
15362 if (!TARGET_64BIT)
15363 goto is_legitimate_pic;
15365 /* 64bit address unspec. */
15366 return false;
15368 case UNSPEC_GOTPCREL:
15369 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
15370 goto is_legitimate_pic;
15371 /* FALLTHRU */
15372 case UNSPEC_PCREL:
15373 gcc_assert (flag_pic);
15374 goto is_legitimate_pic;
15376 case UNSPEC_GOTTPOFF:
15377 case UNSPEC_GOTNTPOFF:
15378 case UNSPEC_INDNTPOFF:
15379 case UNSPEC_NTPOFF:
15380 case UNSPEC_DTPOFF:
15381 break;
15383 default:
15384 /* Invalid address unspec. */
15385 return false;
15388 else if (SYMBOLIC_CONST (disp)
15389 && (flag_pic
15390 || (TARGET_MACHO
15391 #if TARGET_MACHO
15392 && MACHOPIC_INDIRECT
15393 && !machopic_operand_p (disp)
15394 #endif
15398 is_legitimate_pic:
15399 if (TARGET_64BIT && (index || base))
15401 /* foo@dtpoff(%rX) is ok. */
15402 if (GET_CODE (disp) != CONST
15403 || GET_CODE (XEXP (disp, 0)) != PLUS
15404 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
15405 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
15406 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
15407 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
15408 /* Non-constant pic memory reference. */
15409 return false;
15411 else if ((!TARGET_MACHO || flag_pic)
15412 && ! legitimate_pic_address_disp_p (disp))
15413 /* Displacement is an invalid pic construct. */
15414 return false;
15415 #if TARGET_MACHO
15416 else if (MACHO_DYNAMIC_NO_PIC_P
15417 && !ix86_legitimate_constant_p (Pmode, disp))
15418 /* displacment must be referenced via non_lazy_pointer */
15419 return false;
15420 #endif
15422 /* This code used to verify that a symbolic pic displacement
15423 includes the pic_offset_table_rtx register.
15425 While this is good idea, unfortunately these constructs may
15426 be created by "adds using lea" optimization for incorrect
15427 code like:
15429 int a;
15430 int foo(int i)
15432 return *(&a+i);
15435 This code is nonsensical, but results in addressing
15436 GOT table with pic_offset_table_rtx base. We can't
15437 just refuse it easily, since it gets matched by
15438 "addsi3" pattern, that later gets split to lea in the
15439 case output register differs from input. While this
15440 can be handled by separate addsi pattern for this case
15441 that never results in lea, this seems to be easier and
15442 correct fix for crash to disable this test. */
15444 else if (GET_CODE (disp) != LABEL_REF
15445 && !CONST_INT_P (disp)
15446 && (GET_CODE (disp) != CONST
15447 || !ix86_legitimate_constant_p (Pmode, disp))
15448 && (GET_CODE (disp) != SYMBOL_REF
15449 || !ix86_legitimate_constant_p (Pmode, disp)))
15450 /* Displacement is not constant. */
15451 return false;
15452 else if (TARGET_64BIT
15453 && !x86_64_immediate_operand (disp, VOIDmode))
15454 /* Displacement is out of range. */
15455 return false;
15456 /* In x32 mode, constant addresses are sign extended to 64bit, so
15457 we have to prevent addresses from 0x80000000 to 0xffffffff. */
15458 else if (TARGET_X32 && !(index || base)
15459 && CONST_INT_P (disp)
15460 && val_signbit_known_set_p (SImode, INTVAL (disp)))
15461 return false;
15464 /* Everything looks valid. */
15465 return true;
15468 /* Determine if a given RTX is a valid constant address. */
15470 bool
15471 constant_address_p (rtx x)
15473 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
15476 /* Return a unique alias set for the GOT. */
15478 static alias_set_type
15479 ix86_GOT_alias_set (void)
15481 static alias_set_type set = -1;
15482 if (set == -1)
15483 set = new_alias_set ();
15484 return set;
15487 /* Return a legitimate reference for ORIG (an address) using the
15488 register REG. If REG is 0, a new pseudo is generated.
15490 There are two types of references that must be handled:
15492 1. Global data references must load the address from the GOT, via
15493 the PIC reg. An insn is emitted to do this load, and the reg is
15494 returned.
15496 2. Static data references, constant pool addresses, and code labels
15497 compute the address as an offset from the GOT, whose base is in
15498 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
15499 differentiate them from global data objects. The returned
15500 address is the PIC reg + an unspec constant.
15502 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
15503 reg also appears in the address. */
15505 static rtx
15506 legitimize_pic_address (rtx orig, rtx reg)
15508 rtx addr = orig;
15509 rtx new_rtx = orig;
15511 #if TARGET_MACHO
15512 if (TARGET_MACHO && !TARGET_64BIT)
15514 if (reg == 0)
15515 reg = gen_reg_rtx (Pmode);
15516 /* Use the generic Mach-O PIC machinery. */
15517 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
15519 #endif
15521 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15523 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15524 if (tmp)
15525 return tmp;
15528 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
15529 new_rtx = addr;
15530 else if ((!TARGET_64BIT
15531 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
15532 && !TARGET_PECOFF
15533 && gotoff_operand (addr, Pmode))
15535 /* This symbol may be referenced via a displacement
15536 from the PIC base address (@GOTOFF). */
15537 if (GET_CODE (addr) == CONST)
15538 addr = XEXP (addr, 0);
15540 if (GET_CODE (addr) == PLUS)
15542 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
15543 UNSPEC_GOTOFF);
15544 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15546 else
15547 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15549 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15551 if (TARGET_64BIT)
15552 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
15554 if (reg != 0)
15556 gcc_assert (REG_P (reg));
15557 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
15558 new_rtx, reg, 1, OPTAB_DIRECT);
15560 else
15561 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15563 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
15564 /* We can't use @GOTOFF for text labels
15565 on VxWorks, see gotoff_operand. */
15566 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
15568 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15569 if (tmp)
15570 return tmp;
15572 /* For x64 PE-COFF there is no GOT table,
15573 so we use address directly. */
15574 if (TARGET_64BIT && TARGET_PECOFF)
15576 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
15577 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15579 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
15581 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
15582 UNSPEC_GOTPCREL);
15583 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15584 new_rtx = gen_const_mem (Pmode, new_rtx);
15585 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15587 else
15589 /* This symbol must be referenced via a load
15590 from the Global Offset Table (@GOT). */
15591 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
15592 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15593 if (TARGET_64BIT)
15594 new_rtx = force_reg (Pmode, new_rtx);
15595 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15596 new_rtx = gen_const_mem (Pmode, new_rtx);
15597 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15600 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
15602 else
15604 if (CONST_INT_P (addr)
15605 && !x86_64_immediate_operand (addr, VOIDmode))
15606 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
15607 else if (GET_CODE (addr) == CONST)
15609 addr = XEXP (addr, 0);
15611 /* We must match stuff we generate before. Assume the only
15612 unspecs that can get here are ours. Not that we could do
15613 anything with them anyway.... */
15614 if (GET_CODE (addr) == UNSPEC
15615 || (GET_CODE (addr) == PLUS
15616 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
15617 return orig;
15618 gcc_assert (GET_CODE (addr) == PLUS);
15621 if (GET_CODE (addr) == PLUS)
15623 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
15625 /* Check first to see if this is a constant
15626 offset from a @GOTOFF symbol reference. */
15627 if (!TARGET_PECOFF
15628 && gotoff_operand (op0, Pmode)
15629 && CONST_INT_P (op1))
15631 if (!TARGET_64BIT)
15633 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
15634 UNSPEC_GOTOFF);
15635 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
15636 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15638 if (reg != 0)
15640 gcc_assert (REG_P (reg));
15641 new_rtx = expand_simple_binop (Pmode, PLUS,
15642 pic_offset_table_rtx,
15643 new_rtx, reg, 1,
15644 OPTAB_DIRECT);
15646 else
15647 new_rtx
15648 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15650 else
15652 if (INTVAL (op1) < -16*1024*1024
15653 || INTVAL (op1) >= 16*1024*1024)
15655 if (!x86_64_immediate_operand (op1, Pmode))
15656 op1 = force_reg (Pmode, op1);
15658 new_rtx
15659 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
15663 else
15665 rtx base = legitimize_pic_address (op0, reg);
15666 machine_mode mode = GET_MODE (base);
15667 new_rtx
15668 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
15670 if (CONST_INT_P (new_rtx))
15672 if (INTVAL (new_rtx) < -16*1024*1024
15673 || INTVAL (new_rtx) >= 16*1024*1024)
15675 if (!x86_64_immediate_operand (new_rtx, mode))
15676 new_rtx = force_reg (mode, new_rtx);
15678 new_rtx
15679 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
15681 else
15682 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
15684 else
15686 /* For %rip addressing, we have to use
15687 just disp32, not base nor index. */
15688 if (TARGET_64BIT
15689 && (GET_CODE (base) == SYMBOL_REF
15690 || GET_CODE (base) == LABEL_REF))
15691 base = force_reg (mode, base);
15692 if (GET_CODE (new_rtx) == PLUS
15693 && CONSTANT_P (XEXP (new_rtx, 1)))
15695 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
15696 new_rtx = XEXP (new_rtx, 1);
15698 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
15703 return new_rtx;
15706 /* Load the thread pointer. If TO_REG is true, force it into a register. */
15708 static rtx
15709 get_thread_pointer (machine_mode tp_mode, bool to_reg)
15711 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
15713 if (GET_MODE (tp) != tp_mode)
15715 gcc_assert (GET_MODE (tp) == SImode);
15716 gcc_assert (tp_mode == DImode);
15718 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
15721 if (to_reg)
15722 tp = copy_to_mode_reg (tp_mode, tp);
15724 return tp;
15727 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15729 static GTY(()) rtx ix86_tls_symbol;
15731 static rtx
15732 ix86_tls_get_addr (void)
15734 if (!ix86_tls_symbol)
15736 const char *sym
15737 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
15738 ? "___tls_get_addr" : "__tls_get_addr");
15740 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
15743 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
15745 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
15746 UNSPEC_PLTOFF);
15747 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
15748 gen_rtx_CONST (Pmode, unspec));
15751 return ix86_tls_symbol;
15754 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15756 static GTY(()) rtx ix86_tls_module_base_symbol;
15759 ix86_tls_module_base (void)
15761 if (!ix86_tls_module_base_symbol)
15763 ix86_tls_module_base_symbol
15764 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
15766 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15767 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15770 return ix86_tls_module_base_symbol;
15773 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
15774 false if we expect this to be used for a memory address and true if
15775 we expect to load the address into a register. */
15777 static rtx
15778 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
15780 rtx dest, base, off;
15781 rtx pic = NULL_RTX, tp = NULL_RTX;
15782 machine_mode tp_mode = Pmode;
15783 int type;
15785 /* Fall back to global dynamic model if tool chain cannot support local
15786 dynamic. */
15787 if (TARGET_SUN_TLS && !TARGET_64BIT
15788 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
15789 && model == TLS_MODEL_LOCAL_DYNAMIC)
15790 model = TLS_MODEL_GLOBAL_DYNAMIC;
15792 switch (model)
15794 case TLS_MODEL_GLOBAL_DYNAMIC:
15795 dest = gen_reg_rtx (Pmode);
15797 if (!TARGET_64BIT)
15799 if (flag_pic && !TARGET_PECOFF)
15800 pic = pic_offset_table_rtx;
15801 else
15803 pic = gen_reg_rtx (Pmode);
15804 emit_insn (gen_set_got (pic));
15808 if (TARGET_GNU2_TLS)
15810 if (TARGET_64BIT)
15811 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
15812 else
15813 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
15815 tp = get_thread_pointer (Pmode, true);
15816 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
15818 if (GET_MODE (x) != Pmode)
15819 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15821 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15823 else
15825 rtx caddr = ix86_tls_get_addr ();
15827 if (TARGET_64BIT)
15829 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15830 rtx_insn *insns;
15832 start_sequence ();
15833 emit_call_insn
15834 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
15835 insns = get_insns ();
15836 end_sequence ();
15838 if (GET_MODE (x) != Pmode)
15839 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15841 RTL_CONST_CALL_P (insns) = 1;
15842 emit_libcall_block (insns, dest, rax, x);
15844 else
15845 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
15847 break;
15849 case TLS_MODEL_LOCAL_DYNAMIC:
15850 base = gen_reg_rtx (Pmode);
15852 if (!TARGET_64BIT)
15854 if (flag_pic)
15855 pic = pic_offset_table_rtx;
15856 else
15858 pic = gen_reg_rtx (Pmode);
15859 emit_insn (gen_set_got (pic));
15863 if (TARGET_GNU2_TLS)
15865 rtx tmp = ix86_tls_module_base ();
15867 if (TARGET_64BIT)
15868 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
15869 else
15870 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
15872 tp = get_thread_pointer (Pmode, true);
15873 set_unique_reg_note (get_last_insn (), REG_EQUAL,
15874 gen_rtx_MINUS (Pmode, tmp, tp));
15876 else
15878 rtx caddr = ix86_tls_get_addr ();
15880 if (TARGET_64BIT)
15882 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15883 rtx_insn *insns;
15884 rtx eqv;
15886 start_sequence ();
15887 emit_call_insn
15888 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
15889 insns = get_insns ();
15890 end_sequence ();
15892 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
15893 share the LD_BASE result with other LD model accesses. */
15894 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
15895 UNSPEC_TLS_LD_BASE);
15897 RTL_CONST_CALL_P (insns) = 1;
15898 emit_libcall_block (insns, base, rax, eqv);
15900 else
15901 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
15904 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
15905 off = gen_rtx_CONST (Pmode, off);
15907 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
15909 if (TARGET_GNU2_TLS)
15911 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
15913 if (GET_MODE (x) != Pmode)
15914 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15916 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15918 break;
15920 case TLS_MODEL_INITIAL_EXEC:
15921 if (TARGET_64BIT)
15923 if (TARGET_SUN_TLS && !TARGET_X32)
15925 /* The Sun linker took the AMD64 TLS spec literally
15926 and can only handle %rax as destination of the
15927 initial executable code sequence. */
15929 dest = gen_reg_rtx (DImode);
15930 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
15931 return dest;
15934 /* Generate DImode references to avoid %fs:(%reg32)
15935 problems and linker IE->LE relaxation bug. */
15936 tp_mode = DImode;
15937 pic = NULL;
15938 type = UNSPEC_GOTNTPOFF;
15940 else if (flag_pic)
15942 pic = pic_offset_table_rtx;
15943 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
15945 else if (!TARGET_ANY_GNU_TLS)
15947 pic = gen_reg_rtx (Pmode);
15948 emit_insn (gen_set_got (pic));
15949 type = UNSPEC_GOTTPOFF;
15951 else
15953 pic = NULL;
15954 type = UNSPEC_INDNTPOFF;
15957 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
15958 off = gen_rtx_CONST (tp_mode, off);
15959 if (pic)
15960 off = gen_rtx_PLUS (tp_mode, pic, off);
15961 off = gen_const_mem (tp_mode, off);
15962 set_mem_alias_set (off, ix86_GOT_alias_set ());
15964 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15966 base = get_thread_pointer (tp_mode,
15967 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15968 off = force_reg (tp_mode, off);
15969 dest = gen_rtx_PLUS (tp_mode, base, off);
15970 if (tp_mode != Pmode)
15971 dest = convert_to_mode (Pmode, dest, 1);
15973 else
15975 base = get_thread_pointer (Pmode, true);
15976 dest = gen_reg_rtx (Pmode);
15977 emit_insn (ix86_gen_sub3 (dest, base, off));
15979 break;
15981 case TLS_MODEL_LOCAL_EXEC:
15982 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
15983 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15984 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
15985 off = gen_rtx_CONST (Pmode, off);
15987 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15989 base = get_thread_pointer (Pmode,
15990 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15991 return gen_rtx_PLUS (Pmode, base, off);
15993 else
15995 base = get_thread_pointer (Pmode, true);
15996 dest = gen_reg_rtx (Pmode);
15997 emit_insn (ix86_gen_sub3 (dest, base, off));
15999 break;
16001 default:
16002 gcc_unreachable ();
16005 return dest;
16008 /* Return true if OP refers to a TLS address. */
16009 bool
16010 ix86_tls_address_pattern_p (rtx op)
16012 subrtx_var_iterator::array_type array;
16013 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
16015 rtx op = *iter;
16016 if (MEM_P (op))
16018 rtx *x = &XEXP (op, 0);
16019 while (GET_CODE (*x) == PLUS)
16021 int i;
16022 for (i = 0; i < 2; i++)
16024 rtx u = XEXP (*x, i);
16025 if (GET_CODE (u) == ZERO_EXTEND)
16026 u = XEXP (u, 0);
16027 if (GET_CODE (u) == UNSPEC
16028 && XINT (u, 1) == UNSPEC_TP)
16029 return true;
16031 x = &XEXP (*x, 0);
16034 iter.skip_subrtxes ();
16038 return false;
16041 /* Rewrite *LOC so that it refers to a default TLS address space. */
16042 void
16043 ix86_rewrite_tls_address_1 (rtx *loc)
16045 subrtx_ptr_iterator::array_type array;
16046 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
16048 rtx *loc = *iter;
16049 if (MEM_P (*loc))
16051 rtx addr = XEXP (*loc, 0);
16052 rtx *x = &addr;
16053 while (GET_CODE (*x) == PLUS)
16055 int i;
16056 for (i = 0; i < 2; i++)
16058 rtx u = XEXP (*x, i);
16059 if (GET_CODE (u) == ZERO_EXTEND)
16060 u = XEXP (u, 0);
16061 if (GET_CODE (u) == UNSPEC
16062 && XINT (u, 1) == UNSPEC_TP)
16064 addr_space_t as = DEFAULT_TLS_SEG_REG;
16066 *x = XEXP (*x, 1 - i);
16068 *loc = replace_equiv_address_nv (*loc, addr, true);
16069 set_mem_addr_space (*loc, as);
16070 return;
16073 x = &XEXP (*x, 0);
16076 iter.skip_subrtxes ();
16081 /* Rewrite instruction pattern involvning TLS address
16082 so that it refers to a default TLS address space. */
16084 ix86_rewrite_tls_address (rtx pattern)
16086 pattern = copy_insn (pattern);
16087 ix86_rewrite_tls_address_1 (&pattern);
16088 return pattern;
16091 /* Create or return the unique __imp_DECL dllimport symbol corresponding
16092 to symbol DECL if BEIMPORT is true. Otherwise create or return the
16093 unique refptr-DECL symbol corresponding to symbol DECL. */
16095 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
16097 static inline hashval_t hash (tree_map *m) { return m->hash; }
16098 static inline bool
16099 equal (tree_map *a, tree_map *b)
16101 return a->base.from == b->base.from;
16104 static int
16105 keep_cache_entry (tree_map *&m)
16107 return ggc_marked_p (m->base.from);
16111 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
16113 static tree
16114 get_dllimport_decl (tree decl, bool beimport)
16116 struct tree_map *h, in;
16117 const char *name;
16118 const char *prefix;
16119 size_t namelen, prefixlen;
16120 char *imp_name;
16121 tree to;
16122 rtx rtl;
16124 if (!dllimport_map)
16125 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
16127 in.hash = htab_hash_pointer (decl);
16128 in.base.from = decl;
16129 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
16130 h = *loc;
16131 if (h)
16132 return h->to;
16134 *loc = h = ggc_alloc<tree_map> ();
16135 h->hash = in.hash;
16136 h->base.from = decl;
16137 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
16138 VAR_DECL, NULL, ptr_type_node);
16139 DECL_ARTIFICIAL (to) = 1;
16140 DECL_IGNORED_P (to) = 1;
16141 DECL_EXTERNAL (to) = 1;
16142 TREE_READONLY (to) = 1;
16144 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
16145 name = targetm.strip_name_encoding (name);
16146 if (beimport)
16147 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
16148 ? "*__imp_" : "*__imp__";
16149 else
16150 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
16151 namelen = strlen (name);
16152 prefixlen = strlen (prefix);
16153 imp_name = (char *) alloca (namelen + prefixlen + 1);
16154 memcpy (imp_name, prefix, prefixlen);
16155 memcpy (imp_name + prefixlen, name, namelen + 1);
16157 name = ggc_alloc_string (imp_name, namelen + prefixlen);
16158 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
16159 SET_SYMBOL_REF_DECL (rtl, to);
16160 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
16161 if (!beimport)
16163 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
16164 #ifdef SUB_TARGET_RECORD_STUB
16165 SUB_TARGET_RECORD_STUB (name);
16166 #endif
16169 rtl = gen_const_mem (Pmode, rtl);
16170 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
16172 SET_DECL_RTL (to, rtl);
16173 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
16175 return to;
16178 /* Expand SYMBOL into its corresponding far-address symbol.
16179 WANT_REG is true if we require the result be a register. */
16181 static rtx
16182 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
16184 tree imp_decl;
16185 rtx x;
16187 gcc_assert (SYMBOL_REF_DECL (symbol));
16188 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
16190 x = DECL_RTL (imp_decl);
16191 if (want_reg)
16192 x = force_reg (Pmode, x);
16193 return x;
16196 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
16197 true if we require the result be a register. */
16199 static rtx
16200 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
16202 tree imp_decl;
16203 rtx x;
16205 gcc_assert (SYMBOL_REF_DECL (symbol));
16206 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
16208 x = DECL_RTL (imp_decl);
16209 if (want_reg)
16210 x = force_reg (Pmode, x);
16211 return x;
16214 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
16215 is true if we require the result be a register. */
16217 static rtx
16218 legitimize_pe_coff_symbol (rtx addr, bool inreg)
16220 if (!TARGET_PECOFF)
16221 return NULL_RTX;
16223 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16225 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
16226 return legitimize_dllimport_symbol (addr, inreg);
16227 if (GET_CODE (addr) == CONST
16228 && GET_CODE (XEXP (addr, 0)) == PLUS
16229 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
16230 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
16232 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
16233 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
16237 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
16238 return NULL_RTX;
16239 if (GET_CODE (addr) == SYMBOL_REF
16240 && !is_imported_p (addr)
16241 && SYMBOL_REF_EXTERNAL_P (addr)
16242 && SYMBOL_REF_DECL (addr))
16243 return legitimize_pe_coff_extern_decl (addr, inreg);
16245 if (GET_CODE (addr) == CONST
16246 && GET_CODE (XEXP (addr, 0)) == PLUS
16247 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
16248 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
16249 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
16250 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
16252 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
16253 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
16255 return NULL_RTX;
16258 /* Try machine-dependent ways of modifying an illegitimate address
16259 to be legitimate. If we find one, return the new, valid address.
16260 This macro is used in only one place: `memory_address' in explow.c.
16262 OLDX is the address as it was before break_out_memory_refs was called.
16263 In some cases it is useful to look at this to decide what needs to be done.
16265 It is always safe for this macro to do nothing. It exists to recognize
16266 opportunities to optimize the output.
16268 For the 80386, we handle X+REG by loading X into a register R and
16269 using R+REG. R will go in a general reg and indexing will be used.
16270 However, if REG is a broken-out memory address or multiplication,
16271 nothing needs to be done because REG can certainly go in a general reg.
16273 When -fpic is used, special handling is needed for symbolic references.
16274 See comments by legitimize_pic_address in i386.c for details. */
16276 static rtx
16277 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
16279 bool changed = false;
16280 unsigned log;
16282 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
16283 if (log)
16284 return legitimize_tls_address (x, (enum tls_model) log, false);
16285 if (GET_CODE (x) == CONST
16286 && GET_CODE (XEXP (x, 0)) == PLUS
16287 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
16288 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
16290 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
16291 (enum tls_model) log, false);
16292 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
16295 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16297 rtx tmp = legitimize_pe_coff_symbol (x, true);
16298 if (tmp)
16299 return tmp;
16302 if (flag_pic && SYMBOLIC_CONST (x))
16303 return legitimize_pic_address (x, 0);
16305 #if TARGET_MACHO
16306 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
16307 return machopic_indirect_data_reference (x, 0);
16308 #endif
16310 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
16311 if (GET_CODE (x) == ASHIFT
16312 && CONST_INT_P (XEXP (x, 1))
16313 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
16315 changed = true;
16316 log = INTVAL (XEXP (x, 1));
16317 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
16318 GEN_INT (1 << log));
16321 if (GET_CODE (x) == PLUS)
16323 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
16325 if (GET_CODE (XEXP (x, 0)) == ASHIFT
16326 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16327 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
16329 changed = true;
16330 log = INTVAL (XEXP (XEXP (x, 0), 1));
16331 XEXP (x, 0) = gen_rtx_MULT (Pmode,
16332 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
16333 GEN_INT (1 << log));
16336 if (GET_CODE (XEXP (x, 1)) == ASHIFT
16337 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
16338 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
16340 changed = true;
16341 log = INTVAL (XEXP (XEXP (x, 1), 1));
16342 XEXP (x, 1) = gen_rtx_MULT (Pmode,
16343 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
16344 GEN_INT (1 << log));
16347 /* Put multiply first if it isn't already. */
16348 if (GET_CODE (XEXP (x, 1)) == MULT)
16350 std::swap (XEXP (x, 0), XEXP (x, 1));
16351 changed = true;
16354 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
16355 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
16356 created by virtual register instantiation, register elimination, and
16357 similar optimizations. */
16358 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
16360 changed = true;
16361 x = gen_rtx_PLUS (Pmode,
16362 gen_rtx_PLUS (Pmode, XEXP (x, 0),
16363 XEXP (XEXP (x, 1), 0)),
16364 XEXP (XEXP (x, 1), 1));
16367 /* Canonicalize
16368 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
16369 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
16370 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
16371 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16372 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
16373 && CONSTANT_P (XEXP (x, 1)))
16375 rtx constant;
16376 rtx other = NULL_RTX;
16378 if (CONST_INT_P (XEXP (x, 1)))
16380 constant = XEXP (x, 1);
16381 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
16383 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
16385 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
16386 other = XEXP (x, 1);
16388 else
16389 constant = 0;
16391 if (constant)
16393 changed = true;
16394 x = gen_rtx_PLUS (Pmode,
16395 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
16396 XEXP (XEXP (XEXP (x, 0), 1), 0)),
16397 plus_constant (Pmode, other,
16398 INTVAL (constant)));
16402 if (changed && ix86_legitimate_address_p (mode, x, false))
16403 return x;
16405 if (GET_CODE (XEXP (x, 0)) == MULT)
16407 changed = true;
16408 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
16411 if (GET_CODE (XEXP (x, 1)) == MULT)
16413 changed = true;
16414 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
16417 if (changed
16418 && REG_P (XEXP (x, 1))
16419 && REG_P (XEXP (x, 0)))
16420 return x;
16422 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
16424 changed = true;
16425 x = legitimize_pic_address (x, 0);
16428 if (changed && ix86_legitimate_address_p (mode, x, false))
16429 return x;
16431 if (REG_P (XEXP (x, 0)))
16433 rtx temp = gen_reg_rtx (Pmode);
16434 rtx val = force_operand (XEXP (x, 1), temp);
16435 if (val != temp)
16437 val = convert_to_mode (Pmode, val, 1);
16438 emit_move_insn (temp, val);
16441 XEXP (x, 1) = temp;
16442 return x;
16445 else if (REG_P (XEXP (x, 1)))
16447 rtx temp = gen_reg_rtx (Pmode);
16448 rtx val = force_operand (XEXP (x, 0), temp);
16449 if (val != temp)
16451 val = convert_to_mode (Pmode, val, 1);
16452 emit_move_insn (temp, val);
16455 XEXP (x, 0) = temp;
16456 return x;
16460 return x;
16463 /* Print an integer constant expression in assembler syntax. Addition
16464 and subtraction are the only arithmetic that may appear in these
16465 expressions. FILE is the stdio stream to write to, X is the rtx, and
16466 CODE is the operand print code from the output string. */
16468 static void
16469 output_pic_addr_const (FILE *file, rtx x, int code)
16471 char buf[256];
16473 switch (GET_CODE (x))
16475 case PC:
16476 gcc_assert (flag_pic);
16477 putc ('.', file);
16478 break;
16480 case SYMBOL_REF:
16481 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
16482 output_addr_const (file, x);
16483 else
16485 const char *name = XSTR (x, 0);
16487 /* Mark the decl as referenced so that cgraph will
16488 output the function. */
16489 if (SYMBOL_REF_DECL (x))
16490 mark_decl_referenced (SYMBOL_REF_DECL (x));
16492 #if TARGET_MACHO
16493 if (MACHOPIC_INDIRECT
16494 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
16495 name = machopic_indirection_name (x, /*stub_p=*/true);
16496 #endif
16497 assemble_name (file, name);
16499 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
16500 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
16501 fputs ("@PLT", file);
16502 break;
16504 case LABEL_REF:
16505 x = XEXP (x, 0);
16506 /* FALLTHRU */
16507 case CODE_LABEL:
16508 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
16509 assemble_name (asm_out_file, buf);
16510 break;
16512 case CONST_INT:
16513 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16514 break;
16516 case CONST:
16517 /* This used to output parentheses around the expression,
16518 but that does not work on the 386 (either ATT or BSD assembler). */
16519 output_pic_addr_const (file, XEXP (x, 0), code);
16520 break;
16522 case CONST_DOUBLE:
16523 /* We can't handle floating point constants;
16524 TARGET_PRINT_OPERAND must handle them. */
16525 output_operand_lossage ("floating constant misused");
16526 break;
16528 case PLUS:
16529 /* Some assemblers need integer constants to appear first. */
16530 if (CONST_INT_P (XEXP (x, 0)))
16532 output_pic_addr_const (file, XEXP (x, 0), code);
16533 putc ('+', file);
16534 output_pic_addr_const (file, XEXP (x, 1), code);
16536 else
16538 gcc_assert (CONST_INT_P (XEXP (x, 1)));
16539 output_pic_addr_const (file, XEXP (x, 1), code);
16540 putc ('+', file);
16541 output_pic_addr_const (file, XEXP (x, 0), code);
16543 break;
16545 case MINUS:
16546 if (!TARGET_MACHO)
16547 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
16548 output_pic_addr_const (file, XEXP (x, 0), code);
16549 putc ('-', file);
16550 output_pic_addr_const (file, XEXP (x, 1), code);
16551 if (!TARGET_MACHO)
16552 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
16553 break;
16555 case UNSPEC:
16556 gcc_assert (XVECLEN (x, 0) == 1);
16557 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
16558 switch (XINT (x, 1))
16560 case UNSPEC_GOT:
16561 fputs ("@GOT", file);
16562 break;
16563 case UNSPEC_GOTOFF:
16564 fputs ("@GOTOFF", file);
16565 break;
16566 case UNSPEC_PLTOFF:
16567 fputs ("@PLTOFF", file);
16568 break;
16569 case UNSPEC_PCREL:
16570 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16571 "(%rip)" : "[rip]", file);
16572 break;
16573 case UNSPEC_GOTPCREL:
16574 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16575 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
16576 break;
16577 case UNSPEC_GOTTPOFF:
16578 /* FIXME: This might be @TPOFF in Sun ld too. */
16579 fputs ("@gottpoff", file);
16580 break;
16581 case UNSPEC_TPOFF:
16582 fputs ("@tpoff", file);
16583 break;
16584 case UNSPEC_NTPOFF:
16585 if (TARGET_64BIT)
16586 fputs ("@tpoff", file);
16587 else
16588 fputs ("@ntpoff", file);
16589 break;
16590 case UNSPEC_DTPOFF:
16591 fputs ("@dtpoff", file);
16592 break;
16593 case UNSPEC_GOTNTPOFF:
16594 if (TARGET_64BIT)
16595 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16596 "@gottpoff(%rip)": "@gottpoff[rip]", file);
16597 else
16598 fputs ("@gotntpoff", file);
16599 break;
16600 case UNSPEC_INDNTPOFF:
16601 fputs ("@indntpoff", file);
16602 break;
16603 #if TARGET_MACHO
16604 case UNSPEC_MACHOPIC_OFFSET:
16605 putc ('-', file);
16606 machopic_output_function_base_name (file);
16607 break;
16608 #endif
16609 default:
16610 output_operand_lossage ("invalid UNSPEC as operand");
16611 break;
16613 break;
16615 default:
16616 output_operand_lossage ("invalid expression as operand");
16620 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
16621 We need to emit DTP-relative relocations. */
16623 static void ATTRIBUTE_UNUSED
16624 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
16626 fputs (ASM_LONG, file);
16627 output_addr_const (file, x);
16628 fputs ("@dtpoff", file);
16629 switch (size)
16631 case 4:
16632 break;
16633 case 8:
16634 fputs (", 0", file);
16635 break;
16636 default:
16637 gcc_unreachable ();
16641 /* Return true if X is a representation of the PIC register. This copes
16642 with calls from ix86_find_base_term, where the register might have
16643 been replaced by a cselib value. */
16645 static bool
16646 ix86_pic_register_p (rtx x)
16648 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
16649 return (pic_offset_table_rtx
16650 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
16651 else if (!REG_P (x))
16652 return false;
16653 else if (pic_offset_table_rtx)
16655 if (REGNO (x) == REGNO (pic_offset_table_rtx))
16656 return true;
16657 if (HARD_REGISTER_P (x)
16658 && !HARD_REGISTER_P (pic_offset_table_rtx)
16659 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
16660 return true;
16661 return false;
16663 else
16664 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
16667 /* Helper function for ix86_delegitimize_address.
16668 Attempt to delegitimize TLS local-exec accesses. */
16670 static rtx
16671 ix86_delegitimize_tls_address (rtx orig_x)
16673 rtx x = orig_x, unspec;
16674 struct ix86_address addr;
16676 if (!TARGET_TLS_DIRECT_SEG_REFS)
16677 return orig_x;
16678 if (MEM_P (x))
16679 x = XEXP (x, 0);
16680 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
16681 return orig_x;
16682 if (ix86_decompose_address (x, &addr) == 0
16683 || addr.seg != DEFAULT_TLS_SEG_REG
16684 || addr.disp == NULL_RTX
16685 || GET_CODE (addr.disp) != CONST)
16686 return orig_x;
16687 unspec = XEXP (addr.disp, 0);
16688 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
16689 unspec = XEXP (unspec, 0);
16690 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
16691 return orig_x;
16692 x = XVECEXP (unspec, 0, 0);
16693 gcc_assert (GET_CODE (x) == SYMBOL_REF);
16694 if (unspec != XEXP (addr.disp, 0))
16695 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
16696 if (addr.index)
16698 rtx idx = addr.index;
16699 if (addr.scale != 1)
16700 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
16701 x = gen_rtx_PLUS (Pmode, idx, x);
16703 if (addr.base)
16704 x = gen_rtx_PLUS (Pmode, addr.base, x);
16705 if (MEM_P (orig_x))
16706 x = replace_equiv_address_nv (orig_x, x);
16707 return x;
16710 /* In the name of slightly smaller debug output, and to cater to
16711 general assembler lossage, recognize PIC+GOTOFF and turn it back
16712 into a direct symbol reference.
16714 On Darwin, this is necessary to avoid a crash, because Darwin
16715 has a different PIC label for each routine but the DWARF debugging
16716 information is not associated with any particular routine, so it's
16717 necessary to remove references to the PIC label from RTL stored by
16718 the DWARF output code.
16720 This helper is used in the normal ix86_delegitimize_address
16721 entrypoint (e.g. used in the target delegitimization hook) and
16722 in ix86_find_base_term. As compile time memory optimization, we
16723 avoid allocating rtxes that will not change anything on the outcome
16724 of the callers (find_base_value and find_base_term). */
16726 static inline rtx
16727 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
16729 rtx orig_x = delegitimize_mem_from_attrs (x);
16730 /* addend is NULL or some rtx if x is something+GOTOFF where
16731 something doesn't include the PIC register. */
16732 rtx addend = NULL_RTX;
16733 /* reg_addend is NULL or a multiple of some register. */
16734 rtx reg_addend = NULL_RTX;
16735 /* const_addend is NULL or a const_int. */
16736 rtx const_addend = NULL_RTX;
16737 /* This is the result, or NULL. */
16738 rtx result = NULL_RTX;
16740 x = orig_x;
16742 if (MEM_P (x))
16743 x = XEXP (x, 0);
16745 if (TARGET_64BIT)
16747 if (GET_CODE (x) == CONST
16748 && GET_CODE (XEXP (x, 0)) == PLUS
16749 && GET_MODE (XEXP (x, 0)) == Pmode
16750 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16751 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
16752 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
16754 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
16755 base. A CONST can't be arg_pointer_rtx based. */
16756 if (base_term_p && MEM_P (orig_x))
16757 return orig_x;
16758 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
16759 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
16760 if (MEM_P (orig_x))
16761 x = replace_equiv_address_nv (orig_x, x);
16762 return x;
16765 if (GET_CODE (x) == CONST
16766 && GET_CODE (XEXP (x, 0)) == UNSPEC
16767 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
16768 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
16769 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
16771 x = XVECEXP (XEXP (x, 0), 0, 0);
16772 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
16774 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
16775 if (x == NULL_RTX)
16776 return orig_x;
16778 return x;
16781 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
16782 return ix86_delegitimize_tls_address (orig_x);
16784 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
16785 and -mcmodel=medium -fpic. */
16788 if (GET_CODE (x) != PLUS
16789 || GET_CODE (XEXP (x, 1)) != CONST)
16790 return ix86_delegitimize_tls_address (orig_x);
16792 if (ix86_pic_register_p (XEXP (x, 0)))
16793 /* %ebx + GOT/GOTOFF */
16795 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16797 /* %ebx + %reg * scale + GOT/GOTOFF */
16798 reg_addend = XEXP (x, 0);
16799 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
16800 reg_addend = XEXP (reg_addend, 1);
16801 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
16802 reg_addend = XEXP (reg_addend, 0);
16803 else
16805 reg_addend = NULL_RTX;
16806 addend = XEXP (x, 0);
16809 else
16810 addend = XEXP (x, 0);
16812 x = XEXP (XEXP (x, 1), 0);
16813 if (GET_CODE (x) == PLUS
16814 && CONST_INT_P (XEXP (x, 1)))
16816 const_addend = XEXP (x, 1);
16817 x = XEXP (x, 0);
16820 if (GET_CODE (x) == UNSPEC
16821 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
16822 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
16823 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
16824 && !MEM_P (orig_x) && !addend)))
16825 result = XVECEXP (x, 0, 0);
16827 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
16828 && !MEM_P (orig_x))
16829 result = XVECEXP (x, 0, 0);
16831 if (! result)
16832 return ix86_delegitimize_tls_address (orig_x);
16834 /* For (PLUS something CONST_INT) both find_base_{value,term} just
16835 recurse on the first operand. */
16836 if (const_addend && !base_term_p)
16837 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
16838 if (reg_addend)
16839 result = gen_rtx_PLUS (Pmode, reg_addend, result);
16840 if (addend)
16842 /* If the rest of original X doesn't involve the PIC register, add
16843 addend and subtract pic_offset_table_rtx. This can happen e.g.
16844 for code like:
16845 leal (%ebx, %ecx, 4), %ecx
16847 movl foo@GOTOFF(%ecx), %edx
16848 in which case we return (%ecx - %ebx) + foo
16849 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
16850 and reload has completed. Don't do the latter for debug,
16851 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
16852 if (pic_offset_table_rtx
16853 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
16854 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
16855 pic_offset_table_rtx),
16856 result);
16857 else if (base_term_p
16858 && pic_offset_table_rtx
16859 && !TARGET_MACHO
16860 && !TARGET_VXWORKS_RTP)
16862 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
16863 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
16864 result = gen_rtx_PLUS (Pmode, tmp, result);
16866 else
16867 return orig_x;
16869 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
16871 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
16872 if (result == NULL_RTX)
16873 return orig_x;
16875 return result;
16878 /* The normal instantiation of the above template. */
16880 static rtx
16881 ix86_delegitimize_address (rtx x)
16883 return ix86_delegitimize_address_1 (x, false);
16886 /* If X is a machine specific address (i.e. a symbol or label being
16887 referenced as a displacement from the GOT implemented using an
16888 UNSPEC), then return the base term. Otherwise return X. */
16891 ix86_find_base_term (rtx x)
16893 rtx term;
16895 if (TARGET_64BIT)
16897 if (GET_CODE (x) != CONST)
16898 return x;
16899 term = XEXP (x, 0);
16900 if (GET_CODE (term) == PLUS
16901 && CONST_INT_P (XEXP (term, 1)))
16902 term = XEXP (term, 0);
16903 if (GET_CODE (term) != UNSPEC
16904 || (XINT (term, 1) != UNSPEC_GOTPCREL
16905 && XINT (term, 1) != UNSPEC_PCREL))
16906 return x;
16908 return XVECEXP (term, 0, 0);
16911 return ix86_delegitimize_address_1 (x, true);
16914 /* Return true if X shouldn't be emitted into the debug info.
16915 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
16916 symbol easily into the .debug_info section, so we need not to
16917 delegitimize, but instead assemble as @gotoff.
16918 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
16919 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
16921 static bool
16922 ix86_const_not_ok_for_debug_p (rtx x)
16924 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
16925 return true;
16927 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
16928 return true;
16930 return false;
16933 static void
16934 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
16935 bool fp, FILE *file)
16937 const char *suffix;
16939 if (mode == CCFPmode)
16941 code = ix86_fp_compare_code_to_integer (code);
16942 mode = CCmode;
16944 if (reverse)
16945 code = reverse_condition (code);
16947 switch (code)
16949 case EQ:
16950 gcc_assert (mode != CCGZmode);
16951 switch (mode)
16953 case E_CCAmode:
16954 suffix = "a";
16955 break;
16956 case E_CCCmode:
16957 suffix = "c";
16958 break;
16959 case E_CCOmode:
16960 suffix = "o";
16961 break;
16962 case E_CCPmode:
16963 suffix = "p";
16964 break;
16965 case E_CCSmode:
16966 suffix = "s";
16967 break;
16968 default:
16969 suffix = "e";
16970 break;
16972 break;
16973 case NE:
16974 gcc_assert (mode != CCGZmode);
16975 switch (mode)
16977 case E_CCAmode:
16978 suffix = "na";
16979 break;
16980 case E_CCCmode:
16981 suffix = "nc";
16982 break;
16983 case E_CCOmode:
16984 suffix = "no";
16985 break;
16986 case E_CCPmode:
16987 suffix = "np";
16988 break;
16989 case E_CCSmode:
16990 suffix = "ns";
16991 break;
16992 default:
16993 suffix = "ne";
16994 break;
16996 break;
16997 case GT:
16998 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
16999 suffix = "g";
17000 break;
17001 case GTU:
17002 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
17003 Those same assemblers have the same but opposite lossage on cmov. */
17004 if (mode == CCmode)
17005 suffix = fp ? "nbe" : "a";
17006 else
17007 gcc_unreachable ();
17008 break;
17009 case LT:
17010 switch (mode)
17012 case E_CCNOmode:
17013 case E_CCGOCmode:
17014 suffix = "s";
17015 break;
17017 case E_CCmode:
17018 case E_CCGCmode:
17019 case E_CCGZmode:
17020 suffix = "l";
17021 break;
17023 default:
17024 gcc_unreachable ();
17026 break;
17027 case LTU:
17028 if (mode == CCmode || mode == CCGZmode)
17029 suffix = "b";
17030 else if (mode == CCCmode)
17031 suffix = fp ? "b" : "c";
17032 else
17033 gcc_unreachable ();
17034 break;
17035 case GE:
17036 switch (mode)
17038 case E_CCNOmode:
17039 case E_CCGOCmode:
17040 suffix = "ns";
17041 break;
17043 case E_CCmode:
17044 case E_CCGCmode:
17045 case E_CCGZmode:
17046 suffix = "ge";
17047 break;
17049 default:
17050 gcc_unreachable ();
17052 break;
17053 case GEU:
17054 if (mode == CCmode || mode == CCGZmode)
17055 suffix = "nb";
17056 else if (mode == CCCmode)
17057 suffix = fp ? "nb" : "nc";
17058 else
17059 gcc_unreachable ();
17060 break;
17061 case LE:
17062 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
17063 suffix = "le";
17064 break;
17065 case LEU:
17066 if (mode == CCmode)
17067 suffix = "be";
17068 else
17069 gcc_unreachable ();
17070 break;
17071 case UNORDERED:
17072 suffix = fp ? "u" : "p";
17073 break;
17074 case ORDERED:
17075 suffix = fp ? "nu" : "np";
17076 break;
17077 default:
17078 gcc_unreachable ();
17080 fputs (suffix, file);
17083 /* Print the name of register X to FILE based on its machine mode and number.
17084 If CODE is 'w', pretend the mode is HImode.
17085 If CODE is 'b', pretend the mode is QImode.
17086 If CODE is 'k', pretend the mode is SImode.
17087 If CODE is 'q', pretend the mode is DImode.
17088 If CODE is 'x', pretend the mode is V4SFmode.
17089 If CODE is 't', pretend the mode is V8SFmode.
17090 If CODE is 'g', pretend the mode is V16SFmode.
17091 If CODE is 'h', pretend the reg is the 'high' byte register.
17092 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
17093 If CODE is 'd', duplicate the operand for AVX instruction.
17096 void
17097 print_reg (rtx x, int code, FILE *file)
17099 const char *reg;
17100 int msize;
17101 unsigned int regno;
17102 bool duplicated;
17104 if (ASSEMBLER_DIALECT == ASM_ATT)
17105 putc ('%', file);
17107 if (x == pc_rtx)
17109 gcc_assert (TARGET_64BIT);
17110 fputs ("rip", file);
17111 return;
17114 if (code == 'y' && STACK_TOP_P (x))
17116 fputs ("st(0)", file);
17117 return;
17120 if (code == 'w')
17121 msize = 2;
17122 else if (code == 'b')
17123 msize = 1;
17124 else if (code == 'k')
17125 msize = 4;
17126 else if (code == 'q')
17127 msize = 8;
17128 else if (code == 'h')
17129 msize = 0;
17130 else if (code == 'x')
17131 msize = 16;
17132 else if (code == 't')
17133 msize = 32;
17134 else if (code == 'g')
17135 msize = 64;
17136 else
17137 msize = GET_MODE_SIZE (GET_MODE (x));
17139 regno = REGNO (x);
17141 if (regno == ARG_POINTER_REGNUM
17142 || regno == FRAME_POINTER_REGNUM
17143 || regno == FPSR_REG
17144 || regno == FPCR_REG)
17146 output_operand_lossage
17147 ("invalid use of register '%s'", reg_names[regno]);
17148 return;
17150 else if (regno == FLAGS_REG)
17152 output_operand_lossage ("invalid use of asm flag output");
17153 return;
17156 duplicated = code == 'd' && TARGET_AVX;
17158 switch (msize)
17160 case 16:
17161 case 12:
17162 case 8:
17163 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
17164 warning (0, "unsupported size for integer register");
17165 /* FALLTHRU */
17166 case 4:
17167 if (LEGACY_INT_REGNO_P (regno))
17168 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
17169 /* FALLTHRU */
17170 case 2:
17171 normal:
17172 reg = hi_reg_name[regno];
17173 break;
17174 case 1:
17175 if (regno >= ARRAY_SIZE (qi_reg_name))
17176 goto normal;
17177 if (!ANY_QI_REGNO_P (regno))
17178 error ("unsupported size for integer register");
17179 reg = qi_reg_name[regno];
17180 break;
17181 case 0:
17182 if (regno >= ARRAY_SIZE (qi_high_reg_name))
17183 goto normal;
17184 reg = qi_high_reg_name[regno];
17185 break;
17186 case 32:
17187 case 64:
17188 if (SSE_REGNO_P (regno))
17190 gcc_assert (!duplicated);
17191 putc (msize == 32 ? 'y' : 'z', file);
17192 reg = hi_reg_name[regno] + 1;
17193 break;
17195 goto normal;
17196 default:
17197 gcc_unreachable ();
17200 fputs (reg, file);
17202 /* Irritatingly, AMD extended registers use
17203 different naming convention: "r%d[bwd]" */
17204 if (REX_INT_REGNO_P (regno))
17206 gcc_assert (TARGET_64BIT);
17207 switch (msize)
17209 case 0:
17210 error ("extended registers have no high halves");
17211 break;
17212 case 1:
17213 putc ('b', file);
17214 break;
17215 case 2:
17216 putc ('w', file);
17217 break;
17218 case 4:
17219 putc ('d', file);
17220 break;
17221 case 8:
17222 /* no suffix */
17223 break;
17224 default:
17225 error ("unsupported operand size for extended register");
17226 break;
17228 return;
17231 if (duplicated)
17233 if (ASSEMBLER_DIALECT == ASM_ATT)
17234 fprintf (file, ", %%%s", reg);
17235 else
17236 fprintf (file, ", %s", reg);
17240 /* Meaning of CODE:
17241 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
17242 C -- print opcode suffix for set/cmov insn.
17243 c -- like C, but print reversed condition
17244 F,f -- likewise, but for floating-point.
17245 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
17246 otherwise nothing
17247 R -- print embeded rounding and sae.
17248 r -- print only sae.
17249 z -- print the opcode suffix for the size of the current operand.
17250 Z -- likewise, with special suffixes for x87 instructions.
17251 * -- print a star (in certain assembler syntax)
17252 A -- print an absolute memory reference.
17253 E -- print address with DImode register names if TARGET_64BIT.
17254 w -- print the operand as if it's a "word" (HImode) even if it isn't.
17255 s -- print a shift double count, followed by the assemblers argument
17256 delimiter.
17257 b -- print the QImode name of the register for the indicated operand.
17258 %b0 would print %al if operands[0] is reg 0.
17259 w -- likewise, print the HImode name of the register.
17260 k -- likewise, print the SImode name of the register.
17261 q -- likewise, print the DImode name of the register.
17262 x -- likewise, print the V4SFmode name of the register.
17263 t -- likewise, print the V8SFmode name of the register.
17264 g -- likewise, print the V16SFmode name of the register.
17265 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
17266 y -- print "st(0)" instead of "st" as a register.
17267 d -- print duplicated register operand for AVX instruction.
17268 D -- print condition for SSE cmp instruction.
17269 P -- if PIC, print an @PLT suffix.
17270 p -- print raw symbol name.
17271 X -- don't print any sort of PIC '@' suffix for a symbol.
17272 & -- print some in-use local-dynamic symbol name.
17273 H -- print a memory address offset by 8; used for sse high-parts
17274 Y -- print condition for XOP pcom* instruction.
17275 + -- print a branch hint as 'cs' or 'ds' prefix
17276 ; -- print a semicolon (after prefixes due to bug in older gas).
17277 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
17278 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
17279 ! -- print MPX prefix for jxx/call/ret instructions if required.
17282 void
17283 ix86_print_operand (FILE *file, rtx x, int code)
17285 if (code)
17287 switch (code)
17289 case 'A':
17290 switch (ASSEMBLER_DIALECT)
17292 case ASM_ATT:
17293 putc ('*', file);
17294 break;
17296 case ASM_INTEL:
17297 /* Intel syntax. For absolute addresses, registers should not
17298 be surrounded by braces. */
17299 if (!REG_P (x))
17301 putc ('[', file);
17302 ix86_print_operand (file, x, 0);
17303 putc (']', file);
17304 return;
17306 break;
17308 default:
17309 gcc_unreachable ();
17312 ix86_print_operand (file, x, 0);
17313 return;
17315 case 'E':
17316 /* Wrap address in an UNSPEC to declare special handling. */
17317 if (TARGET_64BIT)
17318 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
17320 output_address (VOIDmode, x);
17321 return;
17323 case 'L':
17324 if (ASSEMBLER_DIALECT == ASM_ATT)
17325 putc ('l', file);
17326 return;
17328 case 'W':
17329 if (ASSEMBLER_DIALECT == ASM_ATT)
17330 putc ('w', file);
17331 return;
17333 case 'B':
17334 if (ASSEMBLER_DIALECT == ASM_ATT)
17335 putc ('b', file);
17336 return;
17338 case 'Q':
17339 if (ASSEMBLER_DIALECT == ASM_ATT)
17340 putc ('l', file);
17341 return;
17343 case 'S':
17344 if (ASSEMBLER_DIALECT == ASM_ATT)
17345 putc ('s', file);
17346 return;
17348 case 'T':
17349 if (ASSEMBLER_DIALECT == ASM_ATT)
17350 putc ('t', file);
17351 return;
17353 case 'O':
17354 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17355 if (ASSEMBLER_DIALECT != ASM_ATT)
17356 return;
17358 switch (GET_MODE_SIZE (GET_MODE (x)))
17360 case 2:
17361 putc ('w', file);
17362 break;
17364 case 4:
17365 putc ('l', file);
17366 break;
17368 case 8:
17369 putc ('q', file);
17370 break;
17372 default:
17373 output_operand_lossage ("invalid operand size for operand "
17374 "code 'O'");
17375 return;
17378 putc ('.', file);
17379 #endif
17380 return;
17382 case 'z':
17383 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
17385 /* Opcodes don't get size suffixes if using Intel opcodes. */
17386 if (ASSEMBLER_DIALECT == ASM_INTEL)
17387 return;
17389 switch (GET_MODE_SIZE (GET_MODE (x)))
17391 case 1:
17392 putc ('b', file);
17393 return;
17395 case 2:
17396 putc ('w', file);
17397 return;
17399 case 4:
17400 putc ('l', file);
17401 return;
17403 case 8:
17404 putc ('q', file);
17405 return;
17407 default:
17408 output_operand_lossage ("invalid operand size for operand "
17409 "code 'z'");
17410 return;
17414 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
17415 warning (0, "non-integer operand used with operand code 'z'");
17416 /* FALLTHRU */
17418 case 'Z':
17419 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
17420 if (ASSEMBLER_DIALECT == ASM_INTEL)
17421 return;
17423 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
17425 switch (GET_MODE_SIZE (GET_MODE (x)))
17427 case 2:
17428 #ifdef HAVE_AS_IX86_FILDS
17429 putc ('s', file);
17430 #endif
17431 return;
17433 case 4:
17434 putc ('l', file);
17435 return;
17437 case 8:
17438 #ifdef HAVE_AS_IX86_FILDQ
17439 putc ('q', file);
17440 #else
17441 fputs ("ll", file);
17442 #endif
17443 return;
17445 default:
17446 break;
17449 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
17451 /* 387 opcodes don't get size suffixes
17452 if the operands are registers. */
17453 if (STACK_REG_P (x))
17454 return;
17456 switch (GET_MODE_SIZE (GET_MODE (x)))
17458 case 4:
17459 putc ('s', file);
17460 return;
17462 case 8:
17463 putc ('l', file);
17464 return;
17466 case 12:
17467 case 16:
17468 putc ('t', file);
17469 return;
17471 default:
17472 break;
17475 else
17477 output_operand_lossage ("invalid operand type used with "
17478 "operand code 'Z'");
17479 return;
17482 output_operand_lossage ("invalid operand size for operand code 'Z'");
17483 return;
17485 case 'd':
17486 case 'b':
17487 case 'w':
17488 case 'k':
17489 case 'q':
17490 case 'h':
17491 case 't':
17492 case 'g':
17493 case 'y':
17494 case 'x':
17495 case 'X':
17496 case 'P':
17497 case 'p':
17498 break;
17500 case 's':
17501 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
17503 ix86_print_operand (file, x, 0);
17504 fputs (", ", file);
17506 return;
17508 case 'Y':
17509 switch (GET_CODE (x))
17511 case NE:
17512 fputs ("neq", file);
17513 break;
17514 case EQ:
17515 fputs ("eq", file);
17516 break;
17517 case GE:
17518 case GEU:
17519 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
17520 break;
17521 case GT:
17522 case GTU:
17523 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
17524 break;
17525 case LE:
17526 case LEU:
17527 fputs ("le", file);
17528 break;
17529 case LT:
17530 case LTU:
17531 fputs ("lt", file);
17532 break;
17533 case UNORDERED:
17534 fputs ("unord", file);
17535 break;
17536 case ORDERED:
17537 fputs ("ord", file);
17538 break;
17539 case UNEQ:
17540 fputs ("ueq", file);
17541 break;
17542 case UNGE:
17543 fputs ("nlt", file);
17544 break;
17545 case UNGT:
17546 fputs ("nle", file);
17547 break;
17548 case UNLE:
17549 fputs ("ule", file);
17550 break;
17551 case UNLT:
17552 fputs ("ult", file);
17553 break;
17554 case LTGT:
17555 fputs ("une", file);
17556 break;
17557 default:
17558 output_operand_lossage ("operand is not a condition code, "
17559 "invalid operand code 'Y'");
17560 return;
17562 return;
17564 case 'D':
17565 /* Little bit of braindamage here. The SSE compare instructions
17566 does use completely different names for the comparisons that the
17567 fp conditional moves. */
17568 switch (GET_CODE (x))
17570 case UNEQ:
17571 if (TARGET_AVX)
17573 fputs ("eq_us", file);
17574 break;
17576 /* FALLTHRU */
17577 case EQ:
17578 fputs ("eq", file);
17579 break;
17580 case UNLT:
17581 if (TARGET_AVX)
17583 fputs ("nge", file);
17584 break;
17586 /* FALLTHRU */
17587 case LT:
17588 fputs ("lt", file);
17589 break;
17590 case UNLE:
17591 if (TARGET_AVX)
17593 fputs ("ngt", file);
17594 break;
17596 /* FALLTHRU */
17597 case LE:
17598 fputs ("le", file);
17599 break;
17600 case UNORDERED:
17601 fputs ("unord", file);
17602 break;
17603 case LTGT:
17604 if (TARGET_AVX)
17606 fputs ("neq_oq", file);
17607 break;
17609 /* FALLTHRU */
17610 case NE:
17611 fputs ("neq", file);
17612 break;
17613 case GE:
17614 if (TARGET_AVX)
17616 fputs ("ge", file);
17617 break;
17619 /* FALLTHRU */
17620 case UNGE:
17621 fputs ("nlt", file);
17622 break;
17623 case GT:
17624 if (TARGET_AVX)
17626 fputs ("gt", file);
17627 break;
17629 /* FALLTHRU */
17630 case UNGT:
17631 fputs ("nle", file);
17632 break;
17633 case ORDERED:
17634 fputs ("ord", file);
17635 break;
17636 default:
17637 output_operand_lossage ("operand is not a condition code, "
17638 "invalid operand code 'D'");
17639 return;
17641 return;
17643 case 'F':
17644 case 'f':
17645 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
17646 if (ASSEMBLER_DIALECT == ASM_ATT)
17647 putc ('.', file);
17648 gcc_fallthrough ();
17649 #endif
17651 case 'C':
17652 case 'c':
17653 if (!COMPARISON_P (x))
17655 output_operand_lossage ("operand is not a condition code, "
17656 "invalid operand code '%c'", code);
17657 return;
17659 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
17660 code == 'c' || code == 'f',
17661 code == 'F' || code == 'f',
17662 file);
17663 return;
17665 case 'H':
17666 if (!offsettable_memref_p (x))
17668 output_operand_lossage ("operand is not an offsettable memory "
17669 "reference, invalid operand code 'H'");
17670 return;
17672 /* It doesn't actually matter what mode we use here, as we're
17673 only going to use this for printing. */
17674 x = adjust_address_nv (x, DImode, 8);
17675 /* Output 'qword ptr' for intel assembler dialect. */
17676 if (ASSEMBLER_DIALECT == ASM_INTEL)
17677 code = 'q';
17678 break;
17680 case 'K':
17681 if (!CONST_INT_P (x))
17683 output_operand_lossage ("operand is not an integer, invalid "
17684 "operand code 'K'");
17685 return;
17688 if (INTVAL (x) & IX86_HLE_ACQUIRE)
17689 #ifdef HAVE_AS_IX86_HLE
17690 fputs ("xacquire ", file);
17691 #else
17692 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
17693 #endif
17694 else if (INTVAL (x) & IX86_HLE_RELEASE)
17695 #ifdef HAVE_AS_IX86_HLE
17696 fputs ("xrelease ", file);
17697 #else
17698 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
17699 #endif
17700 /* We do not want to print value of the operand. */
17701 return;
17703 case 'N':
17704 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
17705 fputs ("{z}", file);
17706 return;
17708 case 'r':
17709 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
17711 output_operand_lossage ("operand is not a specific integer, "
17712 "invalid operand code 'r'");
17713 return;
17716 if (ASSEMBLER_DIALECT == ASM_INTEL)
17717 fputs (", ", file);
17719 fputs ("{sae}", file);
17721 if (ASSEMBLER_DIALECT == ASM_ATT)
17722 fputs (", ", file);
17724 return;
17726 case 'R':
17727 if (!CONST_INT_P (x))
17729 output_operand_lossage ("operand is not an integer, invalid "
17730 "operand code 'R'");
17731 return;
17734 if (ASSEMBLER_DIALECT == ASM_INTEL)
17735 fputs (", ", file);
17737 switch (INTVAL (x))
17739 case ROUND_NEAREST_INT | ROUND_SAE:
17740 fputs ("{rn-sae}", file);
17741 break;
17742 case ROUND_NEG_INF | ROUND_SAE:
17743 fputs ("{rd-sae}", file);
17744 break;
17745 case ROUND_POS_INF | ROUND_SAE:
17746 fputs ("{ru-sae}", file);
17747 break;
17748 case ROUND_ZERO | ROUND_SAE:
17749 fputs ("{rz-sae}", file);
17750 break;
17751 default:
17752 output_operand_lossage ("operand is not a specific integer, "
17753 "invalid operand code 'R'");
17756 if (ASSEMBLER_DIALECT == ASM_ATT)
17757 fputs (", ", file);
17759 return;
17761 case '*':
17762 if (ASSEMBLER_DIALECT == ASM_ATT)
17763 putc ('*', file);
17764 return;
17766 case '&':
17768 const char *name = get_some_local_dynamic_name ();
17769 if (name == NULL)
17770 output_operand_lossage ("'%%&' used without any "
17771 "local dynamic TLS references");
17772 else
17773 assemble_name (file, name);
17774 return;
17777 case '+':
17779 rtx x;
17781 if (!optimize
17782 || optimize_function_for_size_p (cfun)
17783 || !TARGET_BRANCH_PREDICTION_HINTS)
17784 return;
17786 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
17787 if (x)
17789 int pred_val = profile_probability::from_reg_br_prob_note
17790 (XINT (x, 0)).to_reg_br_prob_base ();
17792 if (pred_val < REG_BR_PROB_BASE * 45 / 100
17793 || pred_val > REG_BR_PROB_BASE * 55 / 100)
17795 bool taken = pred_val > REG_BR_PROB_BASE / 2;
17796 bool cputaken
17797 = final_forward_branch_p (current_output_insn) == 0;
17799 /* Emit hints only in the case default branch prediction
17800 heuristics would fail. */
17801 if (taken != cputaken)
17803 /* We use 3e (DS) prefix for taken branches and
17804 2e (CS) prefix for not taken branches. */
17805 if (taken)
17806 fputs ("ds ; ", file);
17807 else
17808 fputs ("cs ; ", file);
17812 return;
17815 case ';':
17816 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
17817 putc (';', file);
17818 #endif
17819 return;
17821 case '~':
17822 putc (TARGET_AVX2 ? 'i' : 'f', file);
17823 return;
17825 case '^':
17826 if (TARGET_64BIT && Pmode != word_mode)
17827 fputs ("addr32 ", file);
17828 return;
17830 case '!':
17831 if (ix86_bnd_prefixed_insn_p (current_output_insn))
17832 fputs ("bnd ", file);
17833 if (ix86_notrack_prefixed_insn_p (current_output_insn))
17834 fputs ("notrack ", file);
17835 return;
17837 default:
17838 output_operand_lossage ("invalid operand code '%c'", code);
17842 if (REG_P (x))
17843 print_reg (x, code, file);
17845 else if (MEM_P (x))
17847 rtx addr = XEXP (x, 0);
17849 /* No `byte ptr' prefix for call instructions ... */
17850 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
17852 machine_mode mode = GET_MODE (x);
17853 const char *size;
17855 /* Check for explicit size override codes. */
17856 if (code == 'b')
17857 size = "BYTE";
17858 else if (code == 'w')
17859 size = "WORD";
17860 else if (code == 'k')
17861 size = "DWORD";
17862 else if (code == 'q')
17863 size = "QWORD";
17864 else if (code == 'x')
17865 size = "XMMWORD";
17866 else if (code == 't')
17867 size = "YMMWORD";
17868 else if (code == 'g')
17869 size = "ZMMWORD";
17870 else if (mode == BLKmode)
17871 /* ... or BLKmode operands, when not overridden. */
17872 size = NULL;
17873 else
17874 switch (GET_MODE_SIZE (mode))
17876 case 1: size = "BYTE"; break;
17877 case 2: size = "WORD"; break;
17878 case 4: size = "DWORD"; break;
17879 case 8: size = "QWORD"; break;
17880 case 12: size = "TBYTE"; break;
17881 case 16:
17882 if (mode == XFmode)
17883 size = "TBYTE";
17884 else
17885 size = "XMMWORD";
17886 break;
17887 case 32: size = "YMMWORD"; break;
17888 case 64: size = "ZMMWORD"; break;
17889 default:
17890 gcc_unreachable ();
17892 if (size)
17894 fputs (size, file);
17895 fputs (" PTR ", file);
17899 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
17900 output_operand_lossage ("invalid constraints for operand");
17901 else
17902 ix86_print_operand_address_as
17903 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
17906 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
17908 long l;
17910 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17912 if (ASSEMBLER_DIALECT == ASM_ATT)
17913 putc ('$', file);
17914 /* Sign extend 32bit SFmode immediate to 8 bytes. */
17915 if (code == 'q')
17916 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
17917 (unsigned long long) (int) l);
17918 else
17919 fprintf (file, "0x%08x", (unsigned int) l);
17922 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
17924 long l[2];
17926 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17928 if (ASSEMBLER_DIALECT == ASM_ATT)
17929 putc ('$', file);
17930 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
17933 /* These float cases don't actually occur as immediate operands. */
17934 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
17936 char dstr[30];
17938 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
17939 fputs (dstr, file);
17942 else
17944 /* We have patterns that allow zero sets of memory, for instance.
17945 In 64-bit mode, we should probably support all 8-byte vectors,
17946 since we can in fact encode that into an immediate. */
17947 if (GET_CODE (x) == CONST_VECTOR)
17949 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
17950 x = const0_rtx;
17953 if (code != 'P' && code != 'p')
17955 if (CONST_INT_P (x))
17957 if (ASSEMBLER_DIALECT == ASM_ATT)
17958 putc ('$', file);
17960 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
17961 || GET_CODE (x) == LABEL_REF)
17963 if (ASSEMBLER_DIALECT == ASM_ATT)
17964 putc ('$', file);
17965 else
17966 fputs ("OFFSET FLAT:", file);
17969 if (CONST_INT_P (x))
17970 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17971 else if (flag_pic || MACHOPIC_INDIRECT)
17972 output_pic_addr_const (file, x, code);
17973 else
17974 output_addr_const (file, x);
17978 static bool
17979 ix86_print_operand_punct_valid_p (unsigned char code)
17981 return (code == '*' || code == '+' || code == '&' || code == ';'
17982 || code == '~' || code == '^' || code == '!');
17985 /* Print a memory operand whose address is ADDR. */
17987 static void
17988 ix86_print_operand_address_as (FILE *file, rtx addr,
17989 addr_space_t as, bool no_rip)
17991 struct ix86_address parts;
17992 rtx base, index, disp;
17993 int scale;
17994 int ok;
17995 bool vsib = false;
17996 int code = 0;
17998 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
18000 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18001 gcc_assert (parts.index == NULL_RTX);
18002 parts.index = XVECEXP (addr, 0, 1);
18003 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
18004 addr = XVECEXP (addr, 0, 0);
18005 vsib = true;
18007 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
18009 gcc_assert (TARGET_64BIT);
18010 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18011 code = 'q';
18013 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
18015 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
18016 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
18017 if (parts.base != NULL_RTX)
18019 parts.index = parts.base;
18020 parts.scale = 1;
18022 parts.base = XVECEXP (addr, 0, 0);
18023 addr = XVECEXP (addr, 0, 0);
18025 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
18027 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18028 gcc_assert (parts.index == NULL_RTX);
18029 parts.index = XVECEXP (addr, 0, 1);
18030 addr = XVECEXP (addr, 0, 0);
18032 else
18033 ok = ix86_decompose_address (addr, &parts);
18035 gcc_assert (ok);
18037 base = parts.base;
18038 index = parts.index;
18039 disp = parts.disp;
18040 scale = parts.scale;
18042 if (ADDR_SPACE_GENERIC_P (as))
18043 as = parts.seg;
18044 else
18045 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
18047 if (!ADDR_SPACE_GENERIC_P (as))
18049 const char *string;
18051 if (as == ADDR_SPACE_SEG_FS)
18052 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:");
18053 else if (as == ADDR_SPACE_SEG_GS)
18054 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:");
18055 else
18056 gcc_unreachable ();
18057 fputs (string, file);
18060 /* Use one byte shorter RIP relative addressing for 64bit mode. */
18061 if (TARGET_64BIT && !base && !index && !no_rip)
18063 rtx symbol = disp;
18065 if (GET_CODE (disp) == CONST
18066 && GET_CODE (XEXP (disp, 0)) == PLUS
18067 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18068 symbol = XEXP (XEXP (disp, 0), 0);
18070 if (GET_CODE (symbol) == LABEL_REF
18071 || (GET_CODE (symbol) == SYMBOL_REF
18072 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
18073 base = pc_rtx;
18076 if (!base && !index)
18078 /* Displacement only requires special attention. */
18079 if (CONST_INT_P (disp))
18081 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
18082 fputs ("ds:", file);
18083 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
18085 /* Load the external function address via the GOT slot to avoid PLT. */
18086 else if (GET_CODE (disp) == CONST
18087 && GET_CODE (XEXP (disp, 0)) == UNSPEC
18088 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
18089 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
18090 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
18091 output_pic_addr_const (file, disp, 0);
18092 else if (flag_pic)
18093 output_pic_addr_const (file, disp, 0);
18094 else
18095 output_addr_const (file, disp);
18097 else
18099 /* Print SImode register names to force addr32 prefix. */
18100 if (SImode_address_operand (addr, VOIDmode))
18102 if (flag_checking)
18104 gcc_assert (TARGET_64BIT);
18105 switch (GET_CODE (addr))
18107 case SUBREG:
18108 gcc_assert (GET_MODE (addr) == SImode);
18109 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
18110 break;
18111 case ZERO_EXTEND:
18112 case AND:
18113 gcc_assert (GET_MODE (addr) == DImode);
18114 break;
18115 default:
18116 gcc_unreachable ();
18119 gcc_assert (!code);
18120 code = 'k';
18122 else if (code == 0
18123 && TARGET_X32
18124 && disp
18125 && CONST_INT_P (disp)
18126 && INTVAL (disp) < -16*1024*1024)
18128 /* X32 runs in 64-bit mode, where displacement, DISP, in
18129 address DISP(%r64), is encoded as 32-bit immediate sign-
18130 extended from 32-bit to 64-bit. For -0x40000300(%r64),
18131 address is %r64 + 0xffffffffbffffd00. When %r64 <
18132 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
18133 which is invalid for x32. The correct address is %r64
18134 - 0x40000300 == 0xf7ffdd64. To properly encode
18135 -0x40000300(%r64) for x32, we zero-extend negative
18136 displacement by forcing addr32 prefix which truncates
18137 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
18138 zero-extend all negative displacements, including -1(%rsp).
18139 However, for small negative displacements, sign-extension
18140 won't cause overflow. We only zero-extend negative
18141 displacements if they < -16*1024*1024, which is also used
18142 to check legitimate address displacements for PIC. */
18143 code = 'k';
18146 /* Since the upper 32 bits of RSP are always zero for x32,
18147 we can encode %esp as %rsp to avoid 0x67 prefix if
18148 there is no index register. */
18149 if (TARGET_X32 && Pmode == SImode
18150 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
18151 code = 'q';
18153 if (ASSEMBLER_DIALECT == ASM_ATT)
18155 if (disp)
18157 if (flag_pic)
18158 output_pic_addr_const (file, disp, 0);
18159 else if (GET_CODE (disp) == LABEL_REF)
18160 output_asm_label (disp);
18161 else
18162 output_addr_const (file, disp);
18165 putc ('(', file);
18166 if (base)
18167 print_reg (base, code, file);
18168 if (index)
18170 putc (',', file);
18171 print_reg (index, vsib ? 0 : code, file);
18172 if (scale != 1 || vsib)
18173 fprintf (file, ",%d", scale);
18175 putc (')', file);
18177 else
18179 rtx offset = NULL_RTX;
18181 if (disp)
18183 /* Pull out the offset of a symbol; print any symbol itself. */
18184 if (GET_CODE (disp) == CONST
18185 && GET_CODE (XEXP (disp, 0)) == PLUS
18186 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18188 offset = XEXP (XEXP (disp, 0), 1);
18189 disp = gen_rtx_CONST (VOIDmode,
18190 XEXP (XEXP (disp, 0), 0));
18193 if (flag_pic)
18194 output_pic_addr_const (file, disp, 0);
18195 else if (GET_CODE (disp) == LABEL_REF)
18196 output_asm_label (disp);
18197 else if (CONST_INT_P (disp))
18198 offset = disp;
18199 else
18200 output_addr_const (file, disp);
18203 putc ('[', file);
18204 if (base)
18206 print_reg (base, code, file);
18207 if (offset)
18209 if (INTVAL (offset) >= 0)
18210 putc ('+', file);
18211 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
18214 else if (offset)
18215 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
18216 else
18217 putc ('0', file);
18219 if (index)
18221 putc ('+', file);
18222 print_reg (index, vsib ? 0 : code, file);
18223 if (scale != 1 || vsib)
18224 fprintf (file, "*%d", scale);
18226 putc (']', file);
18231 static void
18232 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
18234 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
18237 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
18239 static bool
18240 i386_asm_output_addr_const_extra (FILE *file, rtx x)
18242 rtx op;
18244 if (GET_CODE (x) != UNSPEC)
18245 return false;
18247 op = XVECEXP (x, 0, 0);
18248 switch (XINT (x, 1))
18250 case UNSPEC_GOTOFF:
18251 output_addr_const (file, op);
18252 fputs ("@gotoff", file);
18253 break;
18254 case UNSPEC_GOTTPOFF:
18255 output_addr_const (file, op);
18256 /* FIXME: This might be @TPOFF in Sun ld. */
18257 fputs ("@gottpoff", file);
18258 break;
18259 case UNSPEC_TPOFF:
18260 output_addr_const (file, op);
18261 fputs ("@tpoff", file);
18262 break;
18263 case UNSPEC_NTPOFF:
18264 output_addr_const (file, op);
18265 if (TARGET_64BIT)
18266 fputs ("@tpoff", file);
18267 else
18268 fputs ("@ntpoff", file);
18269 break;
18270 case UNSPEC_DTPOFF:
18271 output_addr_const (file, op);
18272 fputs ("@dtpoff", file);
18273 break;
18274 case UNSPEC_GOTNTPOFF:
18275 output_addr_const (file, op);
18276 if (TARGET_64BIT)
18277 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
18278 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
18279 else
18280 fputs ("@gotntpoff", file);
18281 break;
18282 case UNSPEC_INDNTPOFF:
18283 output_addr_const (file, op);
18284 fputs ("@indntpoff", file);
18285 break;
18286 #if TARGET_MACHO
18287 case UNSPEC_MACHOPIC_OFFSET:
18288 output_addr_const (file, op);
18289 putc ('-', file);
18290 machopic_output_function_base_name (file);
18291 break;
18292 #endif
18294 default:
18295 return false;
18298 return true;
18301 /* Split one or more double-mode RTL references into pairs of half-mode
18302 references. The RTL can be REG, offsettable MEM, integer constant, or
18303 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
18304 split and "num" is its length. lo_half and hi_half are output arrays
18305 that parallel "operands". */
18307 void
18308 split_double_mode (machine_mode mode, rtx operands[],
18309 int num, rtx lo_half[], rtx hi_half[])
18311 machine_mode half_mode;
18312 unsigned int byte;
18314 switch (mode)
18316 case E_TImode:
18317 half_mode = DImode;
18318 break;
18319 case E_DImode:
18320 half_mode = SImode;
18321 break;
18322 default:
18323 gcc_unreachable ();
18326 byte = GET_MODE_SIZE (half_mode);
18328 while (num--)
18330 rtx op = operands[num];
18332 /* simplify_subreg refuse to split volatile memory addresses,
18333 but we still have to handle it. */
18334 if (MEM_P (op))
18336 lo_half[num] = adjust_address (op, half_mode, 0);
18337 hi_half[num] = adjust_address (op, half_mode, byte);
18339 else
18341 lo_half[num] = simplify_gen_subreg (half_mode, op,
18342 GET_MODE (op) == VOIDmode
18343 ? mode : GET_MODE (op), 0);
18344 hi_half[num] = simplify_gen_subreg (half_mode, op,
18345 GET_MODE (op) == VOIDmode
18346 ? mode : GET_MODE (op), byte);
18351 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
18352 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
18353 is the expression of the binary operation. The output may either be
18354 emitted here, or returned to the caller, like all output_* functions.
18356 There is no guarantee that the operands are the same mode, as they
18357 might be within FLOAT or FLOAT_EXTEND expressions. */
18359 #ifndef SYSV386_COMPAT
18360 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
18361 wants to fix the assemblers because that causes incompatibility
18362 with gcc. No-one wants to fix gcc because that causes
18363 incompatibility with assemblers... You can use the option of
18364 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
18365 #define SYSV386_COMPAT 1
18366 #endif
18368 const char *
18369 output_387_binary_op (rtx_insn *insn, rtx *operands)
18371 static char buf[40];
18372 const char *p;
18373 bool is_sse
18374 = (SSE_REG_P (operands[0])
18375 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
18377 if (is_sse)
18378 p = "%v";
18379 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
18380 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
18381 p = "fi";
18382 else
18383 p = "f";
18385 strcpy (buf, p);
18387 switch (GET_CODE (operands[3]))
18389 case PLUS:
18390 p = "add"; break;
18391 case MINUS:
18392 p = "sub"; break;
18393 case MULT:
18394 p = "mul"; break;
18395 case DIV:
18396 p = "div"; break;
18397 default:
18398 gcc_unreachable ();
18401 strcat (buf, p);
18403 if (is_sse)
18405 p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
18406 strcat (buf, p);
18408 if (TARGET_AVX)
18409 p = "\t{%2, %1, %0|%0, %1, %2}";
18410 else
18411 p = "\t{%2, %0|%0, %2}";
18413 strcat (buf, p);
18414 return buf;
18417 /* Even if we do not want to check the inputs, this documents input
18418 constraints. Which helps in understanding the following code. */
18419 if (flag_checking)
18421 if (STACK_REG_P (operands[0])
18422 && ((REG_P (operands[1])
18423 && REGNO (operands[0]) == REGNO (operands[1])
18424 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
18425 || (REG_P (operands[2])
18426 && REGNO (operands[0]) == REGNO (operands[2])
18427 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
18428 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
18429 ; /* ok */
18430 else
18431 gcc_unreachable ();
18434 switch (GET_CODE (operands[3]))
18436 case MULT:
18437 case PLUS:
18438 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
18439 std::swap (operands[1], operands[2]);
18441 /* know operands[0] == operands[1]. */
18443 if (MEM_P (operands[2]))
18445 p = "%Z2\t%2";
18446 break;
18449 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
18451 if (STACK_TOP_P (operands[0]))
18452 /* How is it that we are storing to a dead operand[2]?
18453 Well, presumably operands[1] is dead too. We can't
18454 store the result to st(0) as st(0) gets popped on this
18455 instruction. Instead store to operands[2] (which I
18456 think has to be st(1)). st(1) will be popped later.
18457 gcc <= 2.8.1 didn't have this check and generated
18458 assembly code that the Unixware assembler rejected. */
18459 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
18460 else
18461 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
18462 break;
18465 if (STACK_TOP_P (operands[0]))
18466 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
18467 else
18468 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
18469 break;
18471 case MINUS:
18472 case DIV:
18473 if (MEM_P (operands[1]))
18475 p = "r%Z1\t%1";
18476 break;
18479 if (MEM_P (operands[2]))
18481 p = "%Z2\t%2";
18482 break;
18485 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
18487 #if SYSV386_COMPAT
18488 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
18489 derived assemblers, confusingly reverse the direction of
18490 the operation for fsub{r} and fdiv{r} when the
18491 destination register is not st(0). The Intel assembler
18492 doesn't have this brain damage. Read !SYSV386_COMPAT to
18493 figure out what the hardware really does. */
18494 if (STACK_TOP_P (operands[0]))
18495 p = "{p\t%0, %2|rp\t%2, %0}";
18496 else
18497 p = "{rp\t%2, %0|p\t%0, %2}";
18498 #else
18499 if (STACK_TOP_P (operands[0]))
18500 /* As above for fmul/fadd, we can't store to st(0). */
18501 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
18502 else
18503 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
18504 #endif
18505 break;
18508 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18510 #if SYSV386_COMPAT
18511 if (STACK_TOP_P (operands[0]))
18512 p = "{rp\t%0, %1|p\t%1, %0}";
18513 else
18514 p = "{p\t%1, %0|rp\t%0, %1}";
18515 #else
18516 if (STACK_TOP_P (operands[0]))
18517 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
18518 else
18519 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
18520 #endif
18521 break;
18524 if (STACK_TOP_P (operands[0]))
18526 if (STACK_TOP_P (operands[1]))
18527 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
18528 else
18529 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
18530 break;
18532 else if (STACK_TOP_P (operands[1]))
18534 #if SYSV386_COMPAT
18535 p = "{\t%1, %0|r\t%0, %1}";
18536 #else
18537 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
18538 #endif
18540 else
18542 #if SYSV386_COMPAT
18543 p = "{r\t%2, %0|\t%0, %2}";
18544 #else
18545 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
18546 #endif
18548 break;
18550 default:
18551 gcc_unreachable ();
18554 strcat (buf, p);
18555 return buf;
18558 /* Return needed mode for entity in optimize_mode_switching pass. */
18560 static int
18561 ix86_dirflag_mode_needed (rtx_insn *insn)
18563 if (CALL_P (insn))
18565 if (cfun->machine->func_type == TYPE_NORMAL)
18566 return X86_DIRFLAG_ANY;
18567 else
18568 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
18569 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
18572 if (recog_memoized (insn) < 0)
18573 return X86_DIRFLAG_ANY;
18575 if (get_attr_type (insn) == TYPE_STR)
18577 /* Emit cld instruction if stringops are used in the function. */
18578 if (cfun->machine->func_type == TYPE_NORMAL)
18579 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
18580 else
18581 return X86_DIRFLAG_RESET;
18584 return X86_DIRFLAG_ANY;
18587 /* Check if a 256bit AVX register is referenced inside of EXP. */
18589 static bool
18590 ix86_check_avx256_register (const_rtx exp)
18592 if (SUBREG_P (exp))
18593 exp = SUBREG_REG (exp);
18595 return (REG_P (exp)
18596 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
18599 /* Return needed mode for entity in optimize_mode_switching pass. */
18601 static int
18602 ix86_avx_u128_mode_needed (rtx_insn *insn)
18604 if (CALL_P (insn))
18606 rtx link;
18608 /* Needed mode is set to AVX_U128_CLEAN if there are
18609 no 256bit modes used in function arguments. */
18610 for (link = CALL_INSN_FUNCTION_USAGE (insn);
18611 link;
18612 link = XEXP (link, 1))
18614 if (GET_CODE (XEXP (link, 0)) == USE)
18616 rtx arg = XEXP (XEXP (link, 0), 0);
18618 if (ix86_check_avx256_register (arg))
18619 return AVX_U128_DIRTY;
18623 return AVX_U128_CLEAN;
18626 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
18627 changes state only when a 256bit register is written to, but we need
18628 to prevent the compiler from moving optimal insertion point above
18629 eventual read from 256bit register. */
18630 subrtx_iterator::array_type array;
18631 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
18632 if (ix86_check_avx256_register (*iter))
18633 return AVX_U128_DIRTY;
18635 return AVX_U128_ANY;
18638 /* Return mode that i387 must be switched into
18639 prior to the execution of insn. */
18641 static int
18642 ix86_i387_mode_needed (int entity, rtx_insn *insn)
18644 enum attr_i387_cw mode;
18646 /* The mode UNINITIALIZED is used to store control word after a
18647 function call or ASM pattern. The mode ANY specify that function
18648 has no requirements on the control word and make no changes in the
18649 bits we are interested in. */
18651 if (CALL_P (insn)
18652 || (NONJUMP_INSN_P (insn)
18653 && (asm_noperands (PATTERN (insn)) >= 0
18654 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
18655 return I387_CW_UNINITIALIZED;
18657 if (recog_memoized (insn) < 0)
18658 return I387_CW_ANY;
18660 mode = get_attr_i387_cw (insn);
18662 switch (entity)
18664 case I387_TRUNC:
18665 if (mode == I387_CW_TRUNC)
18666 return mode;
18667 break;
18669 case I387_FLOOR:
18670 if (mode == I387_CW_FLOOR)
18671 return mode;
18672 break;
18674 case I387_CEIL:
18675 if (mode == I387_CW_CEIL)
18676 return mode;
18677 break;
18679 case I387_MASK_PM:
18680 if (mode == I387_CW_MASK_PM)
18681 return mode;
18682 break;
18684 default:
18685 gcc_unreachable ();
18688 return I387_CW_ANY;
18691 /* Return mode that entity must be switched into
18692 prior to the execution of insn. */
18694 static int
18695 ix86_mode_needed (int entity, rtx_insn *insn)
18697 switch (entity)
18699 case X86_DIRFLAG:
18700 return ix86_dirflag_mode_needed (insn);
18701 case AVX_U128:
18702 return ix86_avx_u128_mode_needed (insn);
18703 case I387_TRUNC:
18704 case I387_FLOOR:
18705 case I387_CEIL:
18706 case I387_MASK_PM:
18707 return ix86_i387_mode_needed (entity, insn);
18708 default:
18709 gcc_unreachable ();
18711 return 0;
18714 /* Check if a 256bit AVX register is referenced in stores. */
18716 static void
18717 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
18719 if (ix86_check_avx256_register (dest))
18721 bool *used = (bool *) data;
18722 *used = true;
18726 /* Calculate mode of upper 128bit AVX registers after the insn. */
18728 static int
18729 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
18731 rtx pat = PATTERN (insn);
18733 if (vzeroupper_operation (pat, VOIDmode)
18734 || vzeroall_operation (pat, VOIDmode))
18735 return AVX_U128_CLEAN;
18737 /* We know that state is clean after CALL insn if there are no
18738 256bit registers used in the function return register. */
18739 if (CALL_P (insn))
18741 bool avx_reg256_found = false;
18742 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
18744 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
18747 /* Otherwise, return current mode. Remember that if insn
18748 references AVX 256bit registers, the mode was already changed
18749 to DIRTY from MODE_NEEDED. */
18750 return mode;
18753 /* Return the mode that an insn results in. */
18755 static int
18756 ix86_mode_after (int entity, int mode, rtx_insn *insn)
18758 switch (entity)
18760 case X86_DIRFLAG:
18761 return mode;
18762 case AVX_U128:
18763 return ix86_avx_u128_mode_after (mode, insn);
18764 case I387_TRUNC:
18765 case I387_FLOOR:
18766 case I387_CEIL:
18767 case I387_MASK_PM:
18768 return mode;
18769 default:
18770 gcc_unreachable ();
18774 static int
18775 ix86_dirflag_mode_entry (void)
18777 /* For TARGET_CLD or in the interrupt handler we can't assume
18778 direction flag state at function entry. */
18779 if (TARGET_CLD
18780 || cfun->machine->func_type != TYPE_NORMAL)
18781 return X86_DIRFLAG_ANY;
18783 return X86_DIRFLAG_RESET;
18786 static int
18787 ix86_avx_u128_mode_entry (void)
18789 tree arg;
18791 /* Entry mode is set to AVX_U128_DIRTY if there are
18792 256bit modes used in function arguments. */
18793 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
18794 arg = TREE_CHAIN (arg))
18796 rtx incoming = DECL_INCOMING_RTL (arg);
18798 if (incoming && ix86_check_avx256_register (incoming))
18799 return AVX_U128_DIRTY;
18802 return AVX_U128_CLEAN;
18805 /* Return a mode that ENTITY is assumed to be
18806 switched to at function entry. */
18808 static int
18809 ix86_mode_entry (int entity)
18811 switch (entity)
18813 case X86_DIRFLAG:
18814 return ix86_dirflag_mode_entry ();
18815 case AVX_U128:
18816 return ix86_avx_u128_mode_entry ();
18817 case I387_TRUNC:
18818 case I387_FLOOR:
18819 case I387_CEIL:
18820 case I387_MASK_PM:
18821 return I387_CW_ANY;
18822 default:
18823 gcc_unreachable ();
18827 static int
18828 ix86_avx_u128_mode_exit (void)
18830 rtx reg = crtl->return_rtx;
18832 /* Exit mode is set to AVX_U128_DIRTY if there are
18833 256bit modes used in the function return register. */
18834 if (reg && ix86_check_avx256_register (reg))
18835 return AVX_U128_DIRTY;
18837 return AVX_U128_CLEAN;
18840 /* Return a mode that ENTITY is assumed to be
18841 switched to at function exit. */
18843 static int
18844 ix86_mode_exit (int entity)
18846 switch (entity)
18848 case X86_DIRFLAG:
18849 return X86_DIRFLAG_ANY;
18850 case AVX_U128:
18851 return ix86_avx_u128_mode_exit ();
18852 case I387_TRUNC:
18853 case I387_FLOOR:
18854 case I387_CEIL:
18855 case I387_MASK_PM:
18856 return I387_CW_ANY;
18857 default:
18858 gcc_unreachable ();
18862 static int
18863 ix86_mode_priority (int, int n)
18865 return n;
18868 /* Output code to initialize control word copies used by trunc?f?i and
18869 rounding patterns. CURRENT_MODE is set to current control word,
18870 while NEW_MODE is set to new control word. */
18872 static void
18873 emit_i387_cw_initialization (int mode)
18875 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
18876 rtx new_mode;
18878 enum ix86_stack_slot slot;
18880 rtx reg = gen_reg_rtx (HImode);
18882 emit_insn (gen_x86_fnstcw_1 (stored_mode));
18883 emit_move_insn (reg, copy_rtx (stored_mode));
18885 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
18886 || optimize_insn_for_size_p ())
18888 switch (mode)
18890 case I387_CW_TRUNC:
18891 /* round toward zero (truncate) */
18892 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
18893 slot = SLOT_CW_TRUNC;
18894 break;
18896 case I387_CW_FLOOR:
18897 /* round down toward -oo */
18898 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18899 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
18900 slot = SLOT_CW_FLOOR;
18901 break;
18903 case I387_CW_CEIL:
18904 /* round up toward +oo */
18905 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18906 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
18907 slot = SLOT_CW_CEIL;
18908 break;
18910 case I387_CW_MASK_PM:
18911 /* mask precision exception for nearbyint() */
18912 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18913 slot = SLOT_CW_MASK_PM;
18914 break;
18916 default:
18917 gcc_unreachable ();
18920 else
18922 switch (mode)
18924 case I387_CW_TRUNC:
18925 /* round toward zero (truncate) */
18926 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
18927 slot = SLOT_CW_TRUNC;
18928 break;
18930 case I387_CW_FLOOR:
18931 /* round down toward -oo */
18932 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
18933 slot = SLOT_CW_FLOOR;
18934 break;
18936 case I387_CW_CEIL:
18937 /* round up toward +oo */
18938 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
18939 slot = SLOT_CW_CEIL;
18940 break;
18942 case I387_CW_MASK_PM:
18943 /* mask precision exception for nearbyint() */
18944 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18945 slot = SLOT_CW_MASK_PM;
18946 break;
18948 default:
18949 gcc_unreachable ();
18953 gcc_assert (slot < MAX_386_STACK_LOCALS);
18955 new_mode = assign_386_stack_local (HImode, slot);
18956 emit_move_insn (new_mode, reg);
18959 /* Emit vzeroupper. */
18961 void
18962 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
18964 int i;
18966 /* Cancel automatic vzeroupper insertion if there are
18967 live call-saved SSE registers at the insertion point. */
18969 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18970 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18971 return;
18973 if (TARGET_64BIT)
18974 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18975 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18976 return;
18978 emit_insn (gen_avx_vzeroupper ());
18981 /* Generate one or more insns to set ENTITY to MODE. */
18983 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
18984 is the set of hard registers live at the point where the insn(s)
18985 are to be inserted. */
18987 static void
18988 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
18989 HARD_REG_SET regs_live)
18991 switch (entity)
18993 case X86_DIRFLAG:
18994 if (mode == X86_DIRFLAG_RESET)
18995 emit_insn (gen_cld ());
18996 break;
18997 case AVX_U128:
18998 if (mode == AVX_U128_CLEAN)
18999 ix86_avx_emit_vzeroupper (regs_live);
19000 break;
19001 case I387_TRUNC:
19002 case I387_FLOOR:
19003 case I387_CEIL:
19004 case I387_MASK_PM:
19005 if (mode != I387_CW_ANY
19006 && mode != I387_CW_UNINITIALIZED)
19007 emit_i387_cw_initialization (mode);
19008 break;
19009 default:
19010 gcc_unreachable ();
19014 /* Output code for INSN to convert a float to a signed int. OPERANDS
19015 are the insn operands. The output may be [HSD]Imode and the input
19016 operand may be [SDX]Fmode. */
19018 const char *
19019 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
19021 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
19022 bool dimode_p = GET_MODE (operands[0]) == DImode;
19023 int round_mode = get_attr_i387_cw (insn);
19025 static char buf[40];
19026 const char *p;
19028 /* Jump through a hoop or two for DImode, since the hardware has no
19029 non-popping instruction. We used to do this a different way, but
19030 that was somewhat fragile and broke with post-reload splitters. */
19031 if ((dimode_p || fisttp) && !stack_top_dies)
19032 output_asm_insn ("fld\t%y1", operands);
19034 gcc_assert (STACK_TOP_P (operands[1]));
19035 gcc_assert (MEM_P (operands[0]));
19036 gcc_assert (GET_MODE (operands[1]) != TFmode);
19038 if (fisttp)
19039 return "fisttp%Z0\t%0";
19041 strcpy (buf, "fist");
19043 if (round_mode != I387_CW_ANY)
19044 output_asm_insn ("fldcw\t%3", operands);
19046 p = "p%Z0\t%0";
19047 strcat (buf, p + !(stack_top_dies || dimode_p));
19049 output_asm_insn (buf, operands);
19051 if (round_mode != I387_CW_ANY)
19052 output_asm_insn ("fldcw\t%2", operands);
19054 return "";
19057 /* Output code for x87 ffreep insn. The OPNO argument, which may only
19058 have the values zero or one, indicates the ffreep insn's operand
19059 from the OPERANDS array. */
19061 static const char *
19062 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
19064 if (TARGET_USE_FFREEP)
19065 #ifdef HAVE_AS_IX86_FFREEP
19066 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
19067 #else
19069 static char retval[32];
19070 int regno = REGNO (operands[opno]);
19072 gcc_assert (STACK_REGNO_P (regno));
19074 regno -= FIRST_STACK_REG;
19076 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
19077 return retval;
19079 #endif
19081 return opno ? "fstp\t%y1" : "fstp\t%y0";
19085 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
19086 should be used. UNORDERED_P is true when fucom should be used. */
19088 const char *
19089 output_fp_compare (rtx_insn *insn, rtx *operands,
19090 bool eflags_p, bool unordered_p)
19092 rtx *xops = eflags_p ? &operands[0] : &operands[1];
19093 bool stack_top_dies;
19095 static char buf[40];
19096 const char *p;
19098 gcc_assert (STACK_TOP_P (xops[0]));
19100 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
19102 if (eflags_p)
19104 p = unordered_p ? "fucomi" : "fcomi";
19105 strcpy (buf, p);
19107 p = "p\t{%y1, %0|%0, %y1}";
19108 strcat (buf, p + !stack_top_dies);
19110 return buf;
19113 if (STACK_REG_P (xops[1])
19114 && stack_top_dies
19115 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
19117 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
19119 /* If both the top of the 387 stack die, and the other operand
19120 is also a stack register that dies, then this must be a
19121 `fcompp' float compare. */
19122 p = unordered_p ? "fucompp" : "fcompp";
19123 strcpy (buf, p);
19125 else if (const0_operand (xops[1], VOIDmode))
19127 gcc_assert (!unordered_p);
19128 strcpy (buf, "ftst");
19130 else
19132 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
19134 gcc_assert (!unordered_p);
19135 p = "ficom";
19137 else
19138 p = unordered_p ? "fucom" : "fcom";
19140 strcpy (buf, p);
19142 p = "p%Z2\t%y2";
19143 strcat (buf, p + !stack_top_dies);
19146 output_asm_insn (buf, operands);
19147 return "fnstsw\t%0";
19150 void
19151 ix86_output_addr_vec_elt (FILE *file, int value)
19153 const char *directive = ASM_LONG;
19155 #ifdef ASM_QUAD
19156 if (TARGET_LP64)
19157 directive = ASM_QUAD;
19158 #else
19159 gcc_assert (!TARGET_64BIT);
19160 #endif
19162 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
19165 void
19166 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
19168 const char *directive = ASM_LONG;
19170 #ifdef ASM_QUAD
19171 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
19172 directive = ASM_QUAD;
19173 #else
19174 gcc_assert (!TARGET_64BIT);
19175 #endif
19176 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
19177 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
19178 fprintf (file, "%s%s%d-%s%d\n",
19179 directive, LPREFIX, value, LPREFIX, rel);
19180 else if (HAVE_AS_GOTOFF_IN_DATA)
19181 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
19182 #if TARGET_MACHO
19183 else if (TARGET_MACHO)
19185 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
19186 machopic_output_function_base_name (file);
19187 putc ('\n', file);
19189 #endif
19190 else
19191 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
19192 GOT_SYMBOL_NAME, LPREFIX, value);
19195 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
19196 for the target. */
19198 void
19199 ix86_expand_clear (rtx dest)
19201 rtx tmp;
19203 /* We play register width games, which are only valid after reload. */
19204 gcc_assert (reload_completed);
19206 /* Avoid HImode and its attendant prefix byte. */
19207 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
19208 dest = gen_rtx_REG (SImode, REGNO (dest));
19209 tmp = gen_rtx_SET (dest, const0_rtx);
19211 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
19213 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19214 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
19217 emit_insn (tmp);
19220 void
19221 ix86_expand_move (machine_mode mode, rtx operands[])
19223 rtx op0, op1;
19224 rtx tmp, addend = NULL_RTX;
19225 enum tls_model model;
19227 op0 = operands[0];
19228 op1 = operands[1];
19230 switch (GET_CODE (op1))
19232 case CONST:
19233 tmp = XEXP (op1, 0);
19235 if (GET_CODE (tmp) != PLUS
19236 || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF)
19237 break;
19239 op1 = XEXP (tmp, 0);
19240 addend = XEXP (tmp, 1);
19241 /* FALLTHRU */
19243 case SYMBOL_REF:
19244 model = SYMBOL_REF_TLS_MODEL (op1);
19246 if (model)
19247 op1 = legitimize_tls_address (op1, model, true);
19248 else if (ix86_force_load_from_GOT_p (op1))
19250 /* Load the external function address via GOT slot to avoid PLT. */
19251 op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
19252 (TARGET_64BIT
19253 ? UNSPEC_GOTPCREL
19254 : UNSPEC_GOT));
19255 op1 = gen_rtx_CONST (Pmode, op1);
19256 op1 = gen_const_mem (Pmode, op1);
19257 set_mem_alias_set (op1, ix86_GOT_alias_set ());
19259 else
19261 tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX);
19262 if (tmp)
19264 op1 = tmp;
19265 if (!addend)
19266 break;
19268 else
19270 op1 = operands[1];
19271 break;
19275 if (addend)
19277 op1 = force_operand (op1, NULL_RTX);
19278 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
19279 op0, 1, OPTAB_DIRECT);
19281 else
19282 op1 = force_operand (op1, op0);
19284 if (op1 == op0)
19285 return;
19287 op1 = convert_to_mode (mode, op1, 1);
19289 default:
19290 break;
19293 if ((flag_pic || MACHOPIC_INDIRECT)
19294 && symbolic_operand (op1, mode))
19296 if (TARGET_MACHO && !TARGET_64BIT)
19298 #if TARGET_MACHO
19299 /* dynamic-no-pic */
19300 if (MACHOPIC_INDIRECT)
19302 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
19303 ? op0 : gen_reg_rtx (Pmode);
19304 op1 = machopic_indirect_data_reference (op1, temp);
19305 if (MACHOPIC_PURE)
19306 op1 = machopic_legitimize_pic_address (op1, mode,
19307 temp == op1 ? 0 : temp);
19309 if (op0 != op1 && GET_CODE (op0) != MEM)
19311 rtx insn = gen_rtx_SET (op0, op1);
19312 emit_insn (insn);
19313 return;
19315 if (GET_CODE (op0) == MEM)
19316 op1 = force_reg (Pmode, op1);
19317 else
19319 rtx temp = op0;
19320 if (GET_CODE (temp) != REG)
19321 temp = gen_reg_rtx (Pmode);
19322 temp = legitimize_pic_address (op1, temp);
19323 if (temp == op0)
19324 return;
19325 op1 = temp;
19327 /* dynamic-no-pic */
19328 #endif
19330 else
19332 if (MEM_P (op0))
19333 op1 = force_reg (mode, op1);
19334 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
19336 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
19337 op1 = legitimize_pic_address (op1, reg);
19338 if (op0 == op1)
19339 return;
19340 op1 = convert_to_mode (mode, op1, 1);
19344 else
19346 if (MEM_P (op0)
19347 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
19348 || !push_operand (op0, mode))
19349 && MEM_P (op1))
19350 op1 = force_reg (mode, op1);
19352 if (push_operand (op0, mode)
19353 && ! general_no_elim_operand (op1, mode))
19354 op1 = copy_to_mode_reg (mode, op1);
19356 /* Force large constants in 64bit compilation into register
19357 to get them CSEed. */
19358 if (can_create_pseudo_p ()
19359 && (mode == DImode) && TARGET_64BIT
19360 && immediate_operand (op1, mode)
19361 && !x86_64_zext_immediate_operand (op1, VOIDmode)
19362 && !register_operand (op0, mode)
19363 && optimize)
19364 op1 = copy_to_mode_reg (mode, op1);
19366 if (can_create_pseudo_p ()
19367 && CONST_DOUBLE_P (op1))
19369 /* If we are loading a floating point constant to a register,
19370 force the value to memory now, since we'll get better code
19371 out the back end. */
19373 op1 = validize_mem (force_const_mem (mode, op1));
19374 if (!register_operand (op0, mode))
19376 rtx temp = gen_reg_rtx (mode);
19377 emit_insn (gen_rtx_SET (temp, op1));
19378 emit_move_insn (op0, temp);
19379 return;
19384 emit_insn (gen_rtx_SET (op0, op1));
19387 void
19388 ix86_expand_vector_move (machine_mode mode, rtx operands[])
19390 rtx op0 = operands[0], op1 = operands[1];
19391 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
19392 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
19393 unsigned int align = (TARGET_IAMCU
19394 ? GET_MODE_BITSIZE (mode)
19395 : GET_MODE_ALIGNMENT (mode));
19397 if (push_operand (op0, VOIDmode))
19398 op0 = emit_move_resolve_push (mode, op0);
19400 /* Force constants other than zero into memory. We do not know how
19401 the instructions used to build constants modify the upper 64 bits
19402 of the register, once we have that information we may be able
19403 to handle some of them more efficiently. */
19404 if (can_create_pseudo_p ()
19405 && (CONSTANT_P (op1)
19406 || (SUBREG_P (op1)
19407 && CONSTANT_P (SUBREG_REG (op1))))
19408 && ((register_operand (op0, mode)
19409 && !standard_sse_constant_p (op1, mode))
19410 /* ix86_expand_vector_move_misalign() does not like constants. */
19411 || (SSE_REG_MODE_P (mode)
19412 && MEM_P (op0)
19413 && MEM_ALIGN (op0) < align)))
19415 if (SUBREG_P (op1))
19417 machine_mode imode = GET_MODE (SUBREG_REG (op1));
19418 rtx r = force_const_mem (imode, SUBREG_REG (op1));
19419 if (r)
19420 r = validize_mem (r);
19421 else
19422 r = force_reg (imode, SUBREG_REG (op1));
19423 op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1));
19425 else
19426 op1 = validize_mem (force_const_mem (mode, op1));
19429 /* We need to check memory alignment for SSE mode since attribute
19430 can make operands unaligned. */
19431 if (can_create_pseudo_p ()
19432 && SSE_REG_MODE_P (mode)
19433 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
19434 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
19436 rtx tmp[2];
19438 /* ix86_expand_vector_move_misalign() does not like both
19439 arguments in memory. */
19440 if (!register_operand (op0, mode)
19441 && !register_operand (op1, mode))
19442 op1 = force_reg (mode, op1);
19444 tmp[0] = op0; tmp[1] = op1;
19445 ix86_expand_vector_move_misalign (mode, tmp);
19446 return;
19449 /* Make operand1 a register if it isn't already. */
19450 if (can_create_pseudo_p ()
19451 && !register_operand (op0, mode)
19452 && !register_operand (op1, mode))
19454 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
19455 return;
19458 emit_insn (gen_rtx_SET (op0, op1));
19461 /* Split 32-byte AVX unaligned load and store if needed. */
19463 static void
19464 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
19466 rtx m;
19467 rtx (*extract) (rtx, rtx, rtx);
19468 machine_mode mode;
19470 if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
19471 || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE))
19473 emit_insn (gen_rtx_SET (op0, op1));
19474 return;
19477 rtx orig_op0 = NULL_RTX;
19478 mode = GET_MODE (op0);
19479 switch (GET_MODE_CLASS (mode))
19481 case MODE_VECTOR_INT:
19482 case MODE_INT:
19483 if (mode != V32QImode)
19485 if (!MEM_P (op0))
19487 orig_op0 = op0;
19488 op0 = gen_reg_rtx (V32QImode);
19490 else
19491 op0 = gen_lowpart (V32QImode, op0);
19492 op1 = gen_lowpart (V32QImode, op1);
19493 mode = V32QImode;
19495 break;
19496 case MODE_VECTOR_FLOAT:
19497 break;
19498 default:
19499 gcc_unreachable ();
19502 switch (mode)
19504 default:
19505 gcc_unreachable ();
19506 case E_V32QImode:
19507 extract = gen_avx_vextractf128v32qi;
19508 mode = V16QImode;
19509 break;
19510 case E_V8SFmode:
19511 extract = gen_avx_vextractf128v8sf;
19512 mode = V4SFmode;
19513 break;
19514 case E_V4DFmode:
19515 extract = gen_avx_vextractf128v4df;
19516 mode = V2DFmode;
19517 break;
19520 if (MEM_P (op1))
19522 rtx r = gen_reg_rtx (mode);
19523 m = adjust_address (op1, mode, 0);
19524 emit_move_insn (r, m);
19525 m = adjust_address (op1, mode, 16);
19526 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
19527 emit_move_insn (op0, r);
19529 else if (MEM_P (op0))
19531 m = adjust_address (op0, mode, 0);
19532 emit_insn (extract (m, op1, const0_rtx));
19533 m = adjust_address (op0, mode, 16);
19534 emit_insn (extract (m, copy_rtx (op1), const1_rtx));
19536 else
19537 gcc_unreachable ();
19539 if (orig_op0)
19540 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19543 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
19544 straight to ix86_expand_vector_move. */
19545 /* Code generation for scalar reg-reg moves of single and double precision data:
19546 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
19547 movaps reg, reg
19548 else
19549 movss reg, reg
19550 if (x86_sse_partial_reg_dependency == true)
19551 movapd reg, reg
19552 else
19553 movsd reg, reg
19555 Code generation for scalar loads of double precision data:
19556 if (x86_sse_split_regs == true)
19557 movlpd mem, reg (gas syntax)
19558 else
19559 movsd mem, reg
19561 Code generation for unaligned packed loads of single precision data
19562 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
19563 if (x86_sse_unaligned_move_optimal)
19564 movups mem, reg
19566 if (x86_sse_partial_reg_dependency == true)
19568 xorps reg, reg
19569 movlps mem, reg
19570 movhps mem+8, reg
19572 else
19574 movlps mem, reg
19575 movhps mem+8, reg
19578 Code generation for unaligned packed loads of double precision data
19579 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
19580 if (x86_sse_unaligned_move_optimal)
19581 movupd mem, reg
19583 if (x86_sse_split_regs == true)
19585 movlpd mem, reg
19586 movhpd mem+8, reg
19588 else
19590 movsd mem, reg
19591 movhpd mem+8, reg
19595 void
19596 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
19598 rtx op0, op1, m;
19600 op0 = operands[0];
19601 op1 = operands[1];
19603 /* Use unaligned load/store for AVX512 or when optimizing for size. */
19604 if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ())
19606 emit_insn (gen_rtx_SET (op0, op1));
19607 return;
19610 if (TARGET_AVX)
19612 if (GET_MODE_SIZE (mode) == 32)
19613 ix86_avx256_split_vector_move_misalign (op0, op1);
19614 else
19615 /* Always use 128-bit mov<mode>_internal pattern for AVX. */
19616 emit_insn (gen_rtx_SET (op0, op1));
19617 return;
19620 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19621 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
19623 emit_insn (gen_rtx_SET (op0, op1));
19624 return;
19627 /* ??? If we have typed data, then it would appear that using
19628 movdqu is the only way to get unaligned data loaded with
19629 integer type. */
19630 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19632 emit_insn (gen_rtx_SET (op0, op1));
19633 return;
19636 if (MEM_P (op1))
19638 if (TARGET_SSE2 && mode == V2DFmode)
19640 rtx zero;
19642 /* When SSE registers are split into halves, we can avoid
19643 writing to the top half twice. */
19644 if (TARGET_SSE_SPLIT_REGS)
19646 emit_clobber (op0);
19647 zero = op0;
19649 else
19651 /* ??? Not sure about the best option for the Intel chips.
19652 The following would seem to satisfy; the register is
19653 entirely cleared, breaking the dependency chain. We
19654 then store to the upper half, with a dependency depth
19655 of one. A rumor has it that Intel recommends two movsd
19656 followed by an unpacklpd, but this is unconfirmed. And
19657 given that the dependency depth of the unpacklpd would
19658 still be one, I'm not sure why this would be better. */
19659 zero = CONST0_RTX (V2DFmode);
19662 m = adjust_address (op1, DFmode, 0);
19663 emit_insn (gen_sse2_loadlpd (op0, zero, m));
19664 m = adjust_address (op1, DFmode, 8);
19665 emit_insn (gen_sse2_loadhpd (op0, op0, m));
19667 else
19669 rtx t;
19671 if (mode != V4SFmode)
19672 t = gen_reg_rtx (V4SFmode);
19673 else
19674 t = op0;
19676 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
19677 emit_move_insn (t, CONST0_RTX (V4SFmode));
19678 else
19679 emit_clobber (t);
19681 m = adjust_address (op1, V2SFmode, 0);
19682 emit_insn (gen_sse_loadlps (t, t, m));
19683 m = adjust_address (op1, V2SFmode, 8);
19684 emit_insn (gen_sse_loadhps (t, t, m));
19685 if (mode != V4SFmode)
19686 emit_move_insn (op0, gen_lowpart (mode, t));
19689 else if (MEM_P (op0))
19691 if (TARGET_SSE2 && mode == V2DFmode)
19693 m = adjust_address (op0, DFmode, 0);
19694 emit_insn (gen_sse2_storelpd (m, op1));
19695 m = adjust_address (op0, DFmode, 8);
19696 emit_insn (gen_sse2_storehpd (m, op1));
19698 else
19700 if (mode != V4SFmode)
19701 op1 = gen_lowpart (V4SFmode, op1);
19703 m = adjust_address (op0, V2SFmode, 0);
19704 emit_insn (gen_sse_storelps (m, op1));
19705 m = adjust_address (op0, V2SFmode, 8);
19706 emit_insn (gen_sse_storehps (m, copy_rtx (op1)));
19709 else
19710 gcc_unreachable ();
19713 /* Helper function of ix86_fixup_binary_operands to canonicalize
19714 operand order. Returns true if the operands should be swapped. */
19716 static bool
19717 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
19718 rtx operands[])
19720 rtx dst = operands[0];
19721 rtx src1 = operands[1];
19722 rtx src2 = operands[2];
19724 /* If the operation is not commutative, we can't do anything. */
19725 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
19726 return false;
19728 /* Highest priority is that src1 should match dst. */
19729 if (rtx_equal_p (dst, src1))
19730 return false;
19731 if (rtx_equal_p (dst, src2))
19732 return true;
19734 /* Next highest priority is that immediate constants come second. */
19735 if (immediate_operand (src2, mode))
19736 return false;
19737 if (immediate_operand (src1, mode))
19738 return true;
19740 /* Lowest priority is that memory references should come second. */
19741 if (MEM_P (src2))
19742 return false;
19743 if (MEM_P (src1))
19744 return true;
19746 return false;
19750 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
19751 destination to use for the operation. If different from the true
19752 destination in operands[0], a copy operation will be required. */
19755 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
19756 rtx operands[])
19758 rtx dst = operands[0];
19759 rtx src1 = operands[1];
19760 rtx src2 = operands[2];
19762 /* Canonicalize operand order. */
19763 if (ix86_swap_binary_operands_p (code, mode, operands))
19765 /* It is invalid to swap operands of different modes. */
19766 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
19768 std::swap (src1, src2);
19771 /* Both source operands cannot be in memory. */
19772 if (MEM_P (src1) && MEM_P (src2))
19774 /* Optimization: Only read from memory once. */
19775 if (rtx_equal_p (src1, src2))
19777 src2 = force_reg (mode, src2);
19778 src1 = src2;
19780 else if (rtx_equal_p (dst, src1))
19781 src2 = force_reg (mode, src2);
19782 else
19783 src1 = force_reg (mode, src1);
19786 /* If the destination is memory, and we do not have matching source
19787 operands, do things in registers. */
19788 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19789 dst = gen_reg_rtx (mode);
19791 /* Source 1 cannot be a constant. */
19792 if (CONSTANT_P (src1))
19793 src1 = force_reg (mode, src1);
19795 /* Source 1 cannot be a non-matching memory. */
19796 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19797 src1 = force_reg (mode, src1);
19799 /* Improve address combine. */
19800 if (code == PLUS
19801 && GET_MODE_CLASS (mode) == MODE_INT
19802 && MEM_P (src2))
19803 src2 = force_reg (mode, src2);
19805 operands[1] = src1;
19806 operands[2] = src2;
19807 return dst;
19810 /* Similarly, but assume that the destination has already been
19811 set up properly. */
19813 void
19814 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
19815 machine_mode mode, rtx operands[])
19817 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
19818 gcc_assert (dst == operands[0]);
19821 /* Attempt to expand a binary operator. Make the expansion closer to the
19822 actual machine, then just general_operand, which will allow 3 separate
19823 memory references (one output, two input) in a single insn. */
19825 void
19826 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
19827 rtx operands[])
19829 rtx src1, src2, dst, op, clob;
19831 dst = ix86_fixup_binary_operands (code, mode, operands);
19832 src1 = operands[1];
19833 src2 = operands[2];
19835 /* Emit the instruction. */
19837 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
19839 if (reload_completed
19840 && code == PLUS
19841 && !rtx_equal_p (dst, src1))
19843 /* This is going to be an LEA; avoid splitting it later. */
19844 emit_insn (op);
19846 else
19848 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19849 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19852 /* Fix up the destination if needed. */
19853 if (dst != operands[0])
19854 emit_move_insn (operands[0], dst);
19857 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
19858 the given OPERANDS. */
19860 void
19861 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
19862 rtx operands[])
19864 rtx op1 = NULL_RTX, op2 = NULL_RTX;
19865 if (SUBREG_P (operands[1]))
19867 op1 = operands[1];
19868 op2 = operands[2];
19870 else if (SUBREG_P (operands[2]))
19872 op1 = operands[2];
19873 op2 = operands[1];
19875 /* Optimize (__m128i) d | (__m128i) e and similar code
19876 when d and e are float vectors into float vector logical
19877 insn. In C/C++ without using intrinsics there is no other way
19878 to express vector logical operation on float vectors than
19879 to cast them temporarily to integer vectors. */
19880 if (op1
19881 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19882 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
19883 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
19884 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
19885 && SUBREG_BYTE (op1) == 0
19886 && (GET_CODE (op2) == CONST_VECTOR
19887 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
19888 && SUBREG_BYTE (op2) == 0))
19889 && can_create_pseudo_p ())
19891 rtx dst;
19892 switch (GET_MODE (SUBREG_REG (op1)))
19894 case E_V4SFmode:
19895 case E_V8SFmode:
19896 case E_V16SFmode:
19897 case E_V2DFmode:
19898 case E_V4DFmode:
19899 case E_V8DFmode:
19900 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
19901 if (GET_CODE (op2) == CONST_VECTOR)
19903 op2 = gen_lowpart (GET_MODE (dst), op2);
19904 op2 = force_reg (GET_MODE (dst), op2);
19906 else
19908 op1 = operands[1];
19909 op2 = SUBREG_REG (operands[2]);
19910 if (!vector_operand (op2, GET_MODE (dst)))
19911 op2 = force_reg (GET_MODE (dst), op2);
19913 op1 = SUBREG_REG (op1);
19914 if (!vector_operand (op1, GET_MODE (dst)))
19915 op1 = force_reg (GET_MODE (dst), op1);
19916 emit_insn (gen_rtx_SET (dst,
19917 gen_rtx_fmt_ee (code, GET_MODE (dst),
19918 op1, op2)));
19919 emit_move_insn (operands[0], gen_lowpart (mode, dst));
19920 return;
19921 default:
19922 break;
19925 if (!vector_operand (operands[1], mode))
19926 operands[1] = force_reg (mode, operands[1]);
19927 if (!vector_operand (operands[2], mode))
19928 operands[2] = force_reg (mode, operands[2]);
19929 ix86_fixup_binary_operands_no_copy (code, mode, operands);
19930 emit_insn (gen_rtx_SET (operands[0],
19931 gen_rtx_fmt_ee (code, mode, operands[1],
19932 operands[2])));
19935 /* Return TRUE or FALSE depending on whether the binary operator meets the
19936 appropriate constraints. */
19938 bool
19939 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
19940 rtx operands[3])
19942 rtx dst = operands[0];
19943 rtx src1 = operands[1];
19944 rtx src2 = operands[2];
19946 /* Both source operands cannot be in memory. */
19947 if (MEM_P (src1) && MEM_P (src2))
19948 return false;
19950 /* Canonicalize operand order for commutative operators. */
19951 if (ix86_swap_binary_operands_p (code, mode, operands))
19952 std::swap (src1, src2);
19954 /* If the destination is memory, we must have a matching source operand. */
19955 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19956 return false;
19958 /* Source 1 cannot be a constant. */
19959 if (CONSTANT_P (src1))
19960 return false;
19962 /* Source 1 cannot be a non-matching memory. */
19963 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19964 /* Support "andhi/andsi/anddi" as a zero-extending move. */
19965 return (code == AND
19966 && (mode == HImode
19967 || mode == SImode
19968 || (TARGET_64BIT && mode == DImode))
19969 && satisfies_constraint_L (src2));
19971 return true;
19974 /* Attempt to expand a unary operator. Make the expansion closer to the
19975 actual machine, then just general_operand, which will allow 2 separate
19976 memory references (one output, one input) in a single insn. */
19978 void
19979 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
19980 rtx operands[])
19982 bool matching_memory = false;
19983 rtx src, dst, op, clob;
19985 dst = operands[0];
19986 src = operands[1];
19988 /* If the destination is memory, and we do not have matching source
19989 operands, do things in registers. */
19990 if (MEM_P (dst))
19992 if (rtx_equal_p (dst, src))
19993 matching_memory = true;
19994 else
19995 dst = gen_reg_rtx (mode);
19998 /* When source operand is memory, destination must match. */
19999 if (MEM_P (src) && !matching_memory)
20000 src = force_reg (mode, src);
20002 /* Emit the instruction. */
20004 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
20006 if (code == NOT)
20007 emit_insn (op);
20008 else
20010 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20011 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20014 /* Fix up the destination if needed. */
20015 if (dst != operands[0])
20016 emit_move_insn (operands[0], dst);
20019 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
20020 divisor are within the range [0-255]. */
20022 void
20023 ix86_split_idivmod (machine_mode mode, rtx operands[],
20024 bool signed_p)
20026 rtx_code_label *end_label, *qimode_label;
20027 rtx div, mod;
20028 rtx_insn *insn;
20029 rtx scratch, tmp0, tmp1, tmp2;
20030 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
20031 rtx (*gen_zero_extend) (rtx, rtx);
20032 rtx (*gen_test_ccno_1) (rtx, rtx);
20034 switch (mode)
20036 case E_SImode:
20037 if (GET_MODE (operands[0]) == SImode)
20039 if (GET_MODE (operands[1]) == SImode)
20040 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
20041 else
20042 gen_divmod4_1
20043 = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2;
20044 gen_zero_extend = gen_zero_extendqisi2;
20046 else
20048 gen_divmod4_1
20049 = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1;
20050 gen_zero_extend = gen_zero_extendqidi2;
20052 gen_test_ccno_1 = gen_testsi_ccno_1;
20053 break;
20054 case E_DImode:
20055 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
20056 gen_test_ccno_1 = gen_testdi_ccno_1;
20057 gen_zero_extend = gen_zero_extendqidi2;
20058 break;
20059 default:
20060 gcc_unreachable ();
20063 end_label = gen_label_rtx ();
20064 qimode_label = gen_label_rtx ();
20066 scratch = gen_reg_rtx (mode);
20068 /* Use 8bit unsigned divimod if dividend and divisor are within
20069 the range [0-255]. */
20070 emit_move_insn (scratch, operands[2]);
20071 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
20072 scratch, 1, OPTAB_DIRECT);
20073 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
20074 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
20075 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
20076 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
20077 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
20078 pc_rtx);
20079 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
20080 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20081 JUMP_LABEL (insn) = qimode_label;
20083 /* Generate original signed/unsigned divimod. */
20084 div = gen_divmod4_1 (operands[0], operands[1],
20085 operands[2], operands[3]);
20086 emit_insn (div);
20088 /* Branch to the end. */
20089 emit_jump_insn (gen_jump (end_label));
20090 emit_barrier ();
20092 /* Generate 8bit unsigned divide. */
20093 emit_label (qimode_label);
20094 /* Don't use operands[0] for result of 8bit divide since not all
20095 registers support QImode ZERO_EXTRACT. */
20096 tmp0 = lowpart_subreg (HImode, scratch, mode);
20097 tmp1 = lowpart_subreg (HImode, operands[2], mode);
20098 tmp2 = lowpart_subreg (QImode, operands[3], mode);
20099 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
20101 if (signed_p)
20103 div = gen_rtx_DIV (mode, operands[2], operands[3]);
20104 mod = gen_rtx_MOD (mode, operands[2], operands[3]);
20106 else
20108 div = gen_rtx_UDIV (mode, operands[2], operands[3]);
20109 mod = gen_rtx_UMOD (mode, operands[2], operands[3]);
20111 if (mode == SImode)
20113 if (GET_MODE (operands[0]) != SImode)
20114 div = gen_rtx_ZERO_EXTEND (DImode, div);
20115 if (GET_MODE (operands[1]) != SImode)
20116 mod = gen_rtx_ZERO_EXTEND (DImode, mod);
20119 /* Extract remainder from AH. */
20120 tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]),
20121 tmp0, GEN_INT (8), GEN_INT (8));
20122 if (REG_P (operands[1]))
20123 insn = emit_move_insn (operands[1], tmp1);
20124 else
20126 /* Need a new scratch register since the old one has result
20127 of 8bit divide. */
20128 scratch = gen_reg_rtx (GET_MODE (operands[1]));
20129 emit_move_insn (scratch, tmp1);
20130 insn = emit_move_insn (operands[1], scratch);
20132 set_unique_reg_note (insn, REG_EQUAL, mod);
20134 /* Zero extend quotient from AL. */
20135 tmp1 = gen_lowpart (QImode, tmp0);
20136 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
20137 set_unique_reg_note (insn, REG_EQUAL, div);
20139 emit_label (end_label);
20142 #define LEA_MAX_STALL (3)
20143 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
20145 /* Increase given DISTANCE in half-cycles according to
20146 dependencies between PREV and NEXT instructions.
20147 Add 1 half-cycle if there is no dependency and
20148 go to next cycle if there is some dependecy. */
20150 static unsigned int
20151 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
20153 df_ref def, use;
20155 if (!prev || !next)
20156 return distance + (distance & 1) + 2;
20158 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
20159 return distance + 1;
20161 FOR_EACH_INSN_USE (use, next)
20162 FOR_EACH_INSN_DEF (def, prev)
20163 if (!DF_REF_IS_ARTIFICIAL (def)
20164 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
20165 return distance + (distance & 1) + 2;
20167 return distance + 1;
20170 /* Function checks if instruction INSN defines register number
20171 REGNO1 or REGNO2. */
20173 static bool
20174 insn_defines_reg (unsigned int regno1, unsigned int regno2,
20175 rtx_insn *insn)
20177 df_ref def;
20179 FOR_EACH_INSN_DEF (def, insn)
20180 if (DF_REF_REG_DEF_P (def)
20181 && !DF_REF_IS_ARTIFICIAL (def)
20182 && (regno1 == DF_REF_REGNO (def)
20183 || regno2 == DF_REF_REGNO (def)))
20184 return true;
20186 return false;
20189 /* Function checks if instruction INSN uses register number
20190 REGNO as a part of address expression. */
20192 static bool
20193 insn_uses_reg_mem (unsigned int regno, rtx insn)
20195 df_ref use;
20197 FOR_EACH_INSN_USE (use, insn)
20198 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
20199 return true;
20201 return false;
20204 /* Search backward for non-agu definition of register number REGNO1
20205 or register number REGNO2 in basic block starting from instruction
20206 START up to head of basic block or instruction INSN.
20208 Function puts true value into *FOUND var if definition was found
20209 and false otherwise.
20211 Distance in half-cycles between START and found instruction or head
20212 of BB is added to DISTANCE and returned. */
20214 static int
20215 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
20216 rtx_insn *insn, int distance,
20217 rtx_insn *start, bool *found)
20219 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
20220 rtx_insn *prev = start;
20221 rtx_insn *next = NULL;
20223 *found = false;
20225 while (prev
20226 && prev != insn
20227 && distance < LEA_SEARCH_THRESHOLD)
20229 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
20231 distance = increase_distance (prev, next, distance);
20232 if (insn_defines_reg (regno1, regno2, prev))
20234 if (recog_memoized (prev) < 0
20235 || get_attr_type (prev) != TYPE_LEA)
20237 *found = true;
20238 return distance;
20242 next = prev;
20244 if (prev == BB_HEAD (bb))
20245 break;
20247 prev = PREV_INSN (prev);
20250 return distance;
20253 /* Search backward for non-agu definition of register number REGNO1
20254 or register number REGNO2 in INSN's basic block until
20255 1. Pass LEA_SEARCH_THRESHOLD instructions, or
20256 2. Reach neighbor BBs boundary, or
20257 3. Reach agu definition.
20258 Returns the distance between the non-agu definition point and INSN.
20259 If no definition point, returns -1. */
20261 static int
20262 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
20263 rtx_insn *insn)
20265 basic_block bb = BLOCK_FOR_INSN (insn);
20266 int distance = 0;
20267 bool found = false;
20269 if (insn != BB_HEAD (bb))
20270 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
20271 distance, PREV_INSN (insn),
20272 &found);
20274 if (!found && distance < LEA_SEARCH_THRESHOLD)
20276 edge e;
20277 edge_iterator ei;
20278 bool simple_loop = false;
20280 FOR_EACH_EDGE (e, ei, bb->preds)
20281 if (e->src == bb)
20283 simple_loop = true;
20284 break;
20287 if (simple_loop)
20288 distance = distance_non_agu_define_in_bb (regno1, regno2,
20289 insn, distance,
20290 BB_END (bb), &found);
20291 else
20293 int shortest_dist = -1;
20294 bool found_in_bb = false;
20296 FOR_EACH_EDGE (e, ei, bb->preds)
20298 int bb_dist
20299 = distance_non_agu_define_in_bb (regno1, regno2,
20300 insn, distance,
20301 BB_END (e->src),
20302 &found_in_bb);
20303 if (found_in_bb)
20305 if (shortest_dist < 0)
20306 shortest_dist = bb_dist;
20307 else if (bb_dist > 0)
20308 shortest_dist = MIN (bb_dist, shortest_dist);
20310 found = true;
20314 distance = shortest_dist;
20318 /* get_attr_type may modify recog data. We want to make sure
20319 that recog data is valid for instruction INSN, on which
20320 distance_non_agu_define is called. INSN is unchanged here. */
20321 extract_insn_cached (insn);
20323 if (!found)
20324 return -1;
20326 return distance >> 1;
20329 /* Return the distance in half-cycles between INSN and the next
20330 insn that uses register number REGNO in memory address added
20331 to DISTANCE. Return -1 if REGNO0 is set.
20333 Put true value into *FOUND if register usage was found and
20334 false otherwise.
20335 Put true value into *REDEFINED if register redefinition was
20336 found and false otherwise. */
20338 static int
20339 distance_agu_use_in_bb (unsigned int regno,
20340 rtx_insn *insn, int distance, rtx_insn *start,
20341 bool *found, bool *redefined)
20343 basic_block bb = NULL;
20344 rtx_insn *next = start;
20345 rtx_insn *prev = NULL;
20347 *found = false;
20348 *redefined = false;
20350 if (start != NULL_RTX)
20352 bb = BLOCK_FOR_INSN (start);
20353 if (start != BB_HEAD (bb))
20354 /* If insn and start belong to the same bb, set prev to insn,
20355 so the call to increase_distance will increase the distance
20356 between insns by 1. */
20357 prev = insn;
20360 while (next
20361 && next != insn
20362 && distance < LEA_SEARCH_THRESHOLD)
20364 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
20366 distance = increase_distance(prev, next, distance);
20367 if (insn_uses_reg_mem (regno, next))
20369 /* Return DISTANCE if OP0 is used in memory
20370 address in NEXT. */
20371 *found = true;
20372 return distance;
20375 if (insn_defines_reg (regno, INVALID_REGNUM, next))
20377 /* Return -1 if OP0 is set in NEXT. */
20378 *redefined = true;
20379 return -1;
20382 prev = next;
20385 if (next == BB_END (bb))
20386 break;
20388 next = NEXT_INSN (next);
20391 return distance;
20394 /* Return the distance between INSN and the next insn that uses
20395 register number REGNO0 in memory address. Return -1 if no such
20396 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
20398 static int
20399 distance_agu_use (unsigned int regno0, rtx_insn *insn)
20401 basic_block bb = BLOCK_FOR_INSN (insn);
20402 int distance = 0;
20403 bool found = false;
20404 bool redefined = false;
20406 if (insn != BB_END (bb))
20407 distance = distance_agu_use_in_bb (regno0, insn, distance,
20408 NEXT_INSN (insn),
20409 &found, &redefined);
20411 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
20413 edge e;
20414 edge_iterator ei;
20415 bool simple_loop = false;
20417 FOR_EACH_EDGE (e, ei, bb->succs)
20418 if (e->dest == bb)
20420 simple_loop = true;
20421 break;
20424 if (simple_loop)
20425 distance = distance_agu_use_in_bb (regno0, insn,
20426 distance, BB_HEAD (bb),
20427 &found, &redefined);
20428 else
20430 int shortest_dist = -1;
20431 bool found_in_bb = false;
20432 bool redefined_in_bb = false;
20434 FOR_EACH_EDGE (e, ei, bb->succs)
20436 int bb_dist
20437 = distance_agu_use_in_bb (regno0, insn,
20438 distance, BB_HEAD (e->dest),
20439 &found_in_bb, &redefined_in_bb);
20440 if (found_in_bb)
20442 if (shortest_dist < 0)
20443 shortest_dist = bb_dist;
20444 else if (bb_dist > 0)
20445 shortest_dist = MIN (bb_dist, shortest_dist);
20447 found = true;
20451 distance = shortest_dist;
20455 if (!found || redefined)
20456 return -1;
20458 return distance >> 1;
20461 /* Define this macro to tune LEA priority vs ADD, it take effect when
20462 there is a dilemma of choicing LEA or ADD
20463 Negative value: ADD is more preferred than LEA
20464 Zero: Netrual
20465 Positive value: LEA is more preferred than ADD*/
20466 #define IX86_LEA_PRIORITY 0
20468 /* Return true if usage of lea INSN has performance advantage
20469 over a sequence of instructions. Instructions sequence has
20470 SPLIT_COST cycles higher latency than lea latency. */
20472 static bool
20473 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
20474 unsigned int regno2, int split_cost, bool has_scale)
20476 int dist_define, dist_use;
20478 /* For Silvermont if using a 2-source or 3-source LEA for
20479 non-destructive destination purposes, or due to wanting
20480 ability to use SCALE, the use of LEA is justified. */
20481 if (TARGET_SILVERMONT || TARGET_INTEL)
20483 if (has_scale)
20484 return true;
20485 if (split_cost < 1)
20486 return false;
20487 if (regno0 == regno1 || regno0 == regno2)
20488 return false;
20489 return true;
20492 dist_define = distance_non_agu_define (regno1, regno2, insn);
20493 dist_use = distance_agu_use (regno0, insn);
20495 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
20497 /* If there is no non AGU operand definition, no AGU
20498 operand usage and split cost is 0 then both lea
20499 and non lea variants have same priority. Currently
20500 we prefer lea for 64 bit code and non lea on 32 bit
20501 code. */
20502 if (dist_use < 0 && split_cost == 0)
20503 return TARGET_64BIT || IX86_LEA_PRIORITY;
20504 else
20505 return true;
20508 /* With longer definitions distance lea is more preferable.
20509 Here we change it to take into account splitting cost and
20510 lea priority. */
20511 dist_define += split_cost + IX86_LEA_PRIORITY;
20513 /* If there is no use in memory addess then we just check
20514 that split cost exceeds AGU stall. */
20515 if (dist_use < 0)
20516 return dist_define > LEA_MAX_STALL;
20518 /* If this insn has both backward non-agu dependence and forward
20519 agu dependence, the one with short distance takes effect. */
20520 return dist_define >= dist_use;
20523 /* Return true if it is legal to clobber flags by INSN and
20524 false otherwise. */
20526 static bool
20527 ix86_ok_to_clobber_flags (rtx_insn *insn)
20529 basic_block bb = BLOCK_FOR_INSN (insn);
20530 df_ref use;
20531 bitmap live;
20533 while (insn)
20535 if (NONDEBUG_INSN_P (insn))
20537 FOR_EACH_INSN_USE (use, insn)
20538 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
20539 return false;
20541 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
20542 return true;
20545 if (insn == BB_END (bb))
20546 break;
20548 insn = NEXT_INSN (insn);
20551 live = df_get_live_out(bb);
20552 return !REGNO_REG_SET_P (live, FLAGS_REG);
20555 /* Return true if we need to split op0 = op1 + op2 into a sequence of
20556 move and add to avoid AGU stalls. */
20558 bool
20559 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
20561 unsigned int regno0, regno1, regno2;
20563 /* Check if we need to optimize. */
20564 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20565 return false;
20567 /* Check it is correct to split here. */
20568 if (!ix86_ok_to_clobber_flags(insn))
20569 return false;
20571 regno0 = true_regnum (operands[0]);
20572 regno1 = true_regnum (operands[1]);
20573 regno2 = true_regnum (operands[2]);
20575 /* We need to split only adds with non destructive
20576 destination operand. */
20577 if (regno0 == regno1 || regno0 == regno2)
20578 return false;
20579 else
20580 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
20583 /* Return true if we should emit lea instruction instead of mov
20584 instruction. */
20586 bool
20587 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
20589 unsigned int regno0, regno1;
20591 /* Check if we need to optimize. */
20592 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20593 return false;
20595 /* Use lea for reg to reg moves only. */
20596 if (!REG_P (operands[0]) || !REG_P (operands[1]))
20597 return false;
20599 regno0 = true_regnum (operands[0]);
20600 regno1 = true_regnum (operands[1]);
20602 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
20605 /* Return true if we need to split lea into a sequence of
20606 instructions to avoid AGU stalls. */
20608 bool
20609 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
20611 unsigned int regno0, regno1, regno2;
20612 int split_cost;
20613 struct ix86_address parts;
20614 int ok;
20616 /* Check we need to optimize. */
20617 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
20618 return false;
20620 /* The "at least two components" test below might not catch simple
20621 move or zero extension insns if parts.base is non-NULL and parts.disp
20622 is const0_rtx as the only components in the address, e.g. if the
20623 register is %rbp or %r13. As this test is much cheaper and moves or
20624 zero extensions are the common case, do this check first. */
20625 if (REG_P (operands[1])
20626 || (SImode_address_operand (operands[1], VOIDmode)
20627 && REG_P (XEXP (operands[1], 0))))
20628 return false;
20630 /* Check if it is OK to split here. */
20631 if (!ix86_ok_to_clobber_flags (insn))
20632 return false;
20634 ok = ix86_decompose_address (operands[1], &parts);
20635 gcc_assert (ok);
20637 /* There should be at least two components in the address. */
20638 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
20639 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
20640 return false;
20642 /* We should not split into add if non legitimate pic
20643 operand is used as displacement. */
20644 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
20645 return false;
20647 regno0 = true_regnum (operands[0]) ;
20648 regno1 = INVALID_REGNUM;
20649 regno2 = INVALID_REGNUM;
20651 if (parts.base)
20652 regno1 = true_regnum (parts.base);
20653 if (parts.index)
20654 regno2 = true_regnum (parts.index);
20656 split_cost = 0;
20658 /* Compute how many cycles we will add to execution time
20659 if split lea into a sequence of instructions. */
20660 if (parts.base || parts.index)
20662 /* Have to use mov instruction if non desctructive
20663 destination form is used. */
20664 if (regno1 != regno0 && regno2 != regno0)
20665 split_cost += 1;
20667 /* Have to add index to base if both exist. */
20668 if (parts.base && parts.index)
20669 split_cost += 1;
20671 /* Have to use shift and adds if scale is 2 or greater. */
20672 if (parts.scale > 1)
20674 if (regno0 != regno1)
20675 split_cost += 1;
20676 else if (regno2 == regno0)
20677 split_cost += 4;
20678 else
20679 split_cost += parts.scale;
20682 /* Have to use add instruction with immediate if
20683 disp is non zero. */
20684 if (parts.disp && parts.disp != const0_rtx)
20685 split_cost += 1;
20687 /* Subtract the price of lea. */
20688 split_cost -= 1;
20691 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
20692 parts.scale > 1);
20695 /* Emit x86 binary operand CODE in mode MODE, where the first operand
20696 matches destination. RTX includes clobber of FLAGS_REG. */
20698 static void
20699 ix86_emit_binop (enum rtx_code code, machine_mode mode,
20700 rtx dst, rtx src)
20702 rtx op, clob;
20704 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
20705 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20707 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20710 /* Return true if regno1 def is nearest to the insn. */
20712 static bool
20713 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
20715 rtx_insn *prev = insn;
20716 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
20718 if (insn == start)
20719 return false;
20720 while (prev && prev != start)
20722 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
20724 prev = PREV_INSN (prev);
20725 continue;
20727 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
20728 return true;
20729 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
20730 return false;
20731 prev = PREV_INSN (prev);
20734 /* None of the regs is defined in the bb. */
20735 return false;
20738 /* Split lea instructions into a sequence of instructions
20739 which are executed on ALU to avoid AGU stalls.
20740 It is assumed that it is allowed to clobber flags register
20741 at lea position. */
20743 void
20744 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
20746 unsigned int regno0, regno1, regno2;
20747 struct ix86_address parts;
20748 rtx target, tmp;
20749 int ok, adds;
20751 ok = ix86_decompose_address (operands[1], &parts);
20752 gcc_assert (ok);
20754 target = gen_lowpart (mode, operands[0]);
20756 regno0 = true_regnum (target);
20757 regno1 = INVALID_REGNUM;
20758 regno2 = INVALID_REGNUM;
20760 if (parts.base)
20762 parts.base = gen_lowpart (mode, parts.base);
20763 regno1 = true_regnum (parts.base);
20766 if (parts.index)
20768 parts.index = gen_lowpart (mode, parts.index);
20769 regno2 = true_regnum (parts.index);
20772 if (parts.disp)
20773 parts.disp = gen_lowpart (mode, parts.disp);
20775 if (parts.scale > 1)
20777 /* Case r1 = r1 + ... */
20778 if (regno1 == regno0)
20780 /* If we have a case r1 = r1 + C * r2 then we
20781 should use multiplication which is very
20782 expensive. Assume cost model is wrong if we
20783 have such case here. */
20784 gcc_assert (regno2 != regno0);
20786 for (adds = parts.scale; adds > 0; adds--)
20787 ix86_emit_binop (PLUS, mode, target, parts.index);
20789 else
20791 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
20792 if (regno0 != regno2)
20793 emit_insn (gen_rtx_SET (target, parts.index));
20795 /* Use shift for scaling. */
20796 ix86_emit_binop (ASHIFT, mode, target,
20797 GEN_INT (exact_log2 (parts.scale)));
20799 if (parts.base)
20800 ix86_emit_binop (PLUS, mode, target, parts.base);
20802 if (parts.disp && parts.disp != const0_rtx)
20803 ix86_emit_binop (PLUS, mode, target, parts.disp);
20806 else if (!parts.base && !parts.index)
20808 gcc_assert(parts.disp);
20809 emit_insn (gen_rtx_SET (target, parts.disp));
20811 else
20813 if (!parts.base)
20815 if (regno0 != regno2)
20816 emit_insn (gen_rtx_SET (target, parts.index));
20818 else if (!parts.index)
20820 if (regno0 != regno1)
20821 emit_insn (gen_rtx_SET (target, parts.base));
20823 else
20825 if (regno0 == regno1)
20826 tmp = parts.index;
20827 else if (regno0 == regno2)
20828 tmp = parts.base;
20829 else
20831 rtx tmp1;
20833 /* Find better operand for SET instruction, depending
20834 on which definition is farther from the insn. */
20835 if (find_nearest_reg_def (insn, regno1, regno2))
20836 tmp = parts.index, tmp1 = parts.base;
20837 else
20838 tmp = parts.base, tmp1 = parts.index;
20840 emit_insn (gen_rtx_SET (target, tmp));
20842 if (parts.disp && parts.disp != const0_rtx)
20843 ix86_emit_binop (PLUS, mode, target, parts.disp);
20845 ix86_emit_binop (PLUS, mode, target, tmp1);
20846 return;
20849 ix86_emit_binop (PLUS, mode, target, tmp);
20852 if (parts.disp && parts.disp != const0_rtx)
20853 ix86_emit_binop (PLUS, mode, target, parts.disp);
20857 /* Return true if it is ok to optimize an ADD operation to LEA
20858 operation to avoid flag register consumation. For most processors,
20859 ADD is faster than LEA. For the processors like BONNELL, if the
20860 destination register of LEA holds an actual address which will be
20861 used soon, LEA is better and otherwise ADD is better. */
20863 bool
20864 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
20866 unsigned int regno0 = true_regnum (operands[0]);
20867 unsigned int regno1 = true_regnum (operands[1]);
20868 unsigned int regno2 = true_regnum (operands[2]);
20870 /* If a = b + c, (a!=b && a!=c), must use lea form. */
20871 if (regno0 != regno1 && regno0 != regno2)
20872 return true;
20874 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20875 return false;
20877 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
20880 /* Return true if destination reg of SET_BODY is shift count of
20881 USE_BODY. */
20883 static bool
20884 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
20886 rtx set_dest;
20887 rtx shift_rtx;
20888 int i;
20890 /* Retrieve destination of SET_BODY. */
20891 switch (GET_CODE (set_body))
20893 case SET:
20894 set_dest = SET_DEST (set_body);
20895 if (!set_dest || !REG_P (set_dest))
20896 return false;
20897 break;
20898 case PARALLEL:
20899 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
20900 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
20901 use_body))
20902 return true;
20903 /* FALLTHROUGH */
20904 default:
20905 return false;
20908 /* Retrieve shift count of USE_BODY. */
20909 switch (GET_CODE (use_body))
20911 case SET:
20912 shift_rtx = XEXP (use_body, 1);
20913 break;
20914 case PARALLEL:
20915 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
20916 if (ix86_dep_by_shift_count_body (set_body,
20917 XVECEXP (use_body, 0, i)))
20918 return true;
20919 /* FALLTHROUGH */
20920 default:
20921 return false;
20924 if (shift_rtx
20925 && (GET_CODE (shift_rtx) == ASHIFT
20926 || GET_CODE (shift_rtx) == LSHIFTRT
20927 || GET_CODE (shift_rtx) == ASHIFTRT
20928 || GET_CODE (shift_rtx) == ROTATE
20929 || GET_CODE (shift_rtx) == ROTATERT))
20931 rtx shift_count = XEXP (shift_rtx, 1);
20933 /* Return true if shift count is dest of SET_BODY. */
20934 if (REG_P (shift_count))
20936 /* Add check since it can be invoked before register
20937 allocation in pre-reload schedule. */
20938 if (reload_completed
20939 && true_regnum (set_dest) == true_regnum (shift_count))
20940 return true;
20941 else if (REGNO(set_dest) == REGNO(shift_count))
20942 return true;
20946 return false;
20949 /* Return true if destination reg of SET_INSN is shift count of
20950 USE_INSN. */
20952 bool
20953 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
20955 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
20956 PATTERN (use_insn));
20959 /* Return TRUE or FALSE depending on whether the unary operator meets the
20960 appropriate constraints. */
20962 bool
20963 ix86_unary_operator_ok (enum rtx_code,
20964 machine_mode,
20965 rtx operands[2])
20967 /* If one of operands is memory, source and destination must match. */
20968 if ((MEM_P (operands[0])
20969 || MEM_P (operands[1]))
20970 && ! rtx_equal_p (operands[0], operands[1]))
20971 return false;
20972 return true;
20975 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
20976 are ok, keeping in mind the possible movddup alternative. */
20978 bool
20979 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
20981 if (MEM_P (operands[0]))
20982 return rtx_equal_p (operands[0], operands[1 + high]);
20983 if (MEM_P (operands[1]) && MEM_P (operands[2]))
20984 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
20985 return true;
20988 /* Post-reload splitter for converting an SF or DFmode value in an
20989 SSE register into an unsigned SImode. */
20991 void
20992 ix86_split_convert_uns_si_sse (rtx operands[])
20994 machine_mode vecmode;
20995 rtx value, large, zero_or_two31, input, two31, x;
20997 large = operands[1];
20998 zero_or_two31 = operands[2];
20999 input = operands[3];
21000 two31 = operands[4];
21001 vecmode = GET_MODE (large);
21002 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
21004 /* Load up the value into the low element. We must ensure that the other
21005 elements are valid floats -- zero is the easiest such value. */
21006 if (MEM_P (input))
21008 if (vecmode == V4SFmode)
21009 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
21010 else
21011 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
21013 else
21015 input = gen_rtx_REG (vecmode, REGNO (input));
21016 emit_move_insn (value, CONST0_RTX (vecmode));
21017 if (vecmode == V4SFmode)
21018 emit_insn (gen_sse_movss (value, value, input));
21019 else
21020 emit_insn (gen_sse2_movsd (value, value, input));
21023 emit_move_insn (large, two31);
21024 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
21026 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
21027 emit_insn (gen_rtx_SET (large, x));
21029 x = gen_rtx_AND (vecmode, zero_or_two31, large);
21030 emit_insn (gen_rtx_SET (zero_or_two31, x));
21032 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
21033 emit_insn (gen_rtx_SET (value, x));
21035 large = gen_rtx_REG (V4SImode, REGNO (large));
21036 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
21038 x = gen_rtx_REG (V4SImode, REGNO (value));
21039 if (vecmode == V4SFmode)
21040 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
21041 else
21042 emit_insn (gen_sse2_cvttpd2dq (x, value));
21043 value = x;
21045 emit_insn (gen_xorv4si3 (value, value, large));
21048 /* Convert an unsigned DImode value into a DFmode, using only SSE.
21049 Expects the 64-bit DImode to be supplied in a pair of integral
21050 registers. Requires SSE2; will use SSE3 if available. For x86_32,
21051 -mfpmath=sse, !optimize_size only. */
21053 void
21054 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
21056 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
21057 rtx int_xmm, fp_xmm;
21058 rtx biases, exponents;
21059 rtx x;
21061 int_xmm = gen_reg_rtx (V4SImode);
21062 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
21063 emit_insn (gen_movdi_to_sse (int_xmm, input));
21064 else if (TARGET_SSE_SPLIT_REGS)
21066 emit_clobber (int_xmm);
21067 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
21069 else
21071 x = gen_reg_rtx (V2DImode);
21072 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
21073 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
21076 x = gen_rtx_CONST_VECTOR (V4SImode,
21077 gen_rtvec (4, GEN_INT (0x43300000UL),
21078 GEN_INT (0x45300000UL),
21079 const0_rtx, const0_rtx));
21080 exponents = validize_mem (force_const_mem (V4SImode, x));
21082 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
21083 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
21085 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
21086 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
21087 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
21088 (0x1.0p84 + double(fp_value_hi_xmm)).
21089 Note these exponents differ by 32. */
21091 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
21093 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
21094 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
21095 real_ldexp (&bias_lo_rvt, &dconst1, 52);
21096 real_ldexp (&bias_hi_rvt, &dconst1, 84);
21097 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
21098 x = const_double_from_real_value (bias_hi_rvt, DFmode);
21099 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
21100 biases = validize_mem (force_const_mem (V2DFmode, biases));
21101 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
21103 /* Add the upper and lower DFmode values together. */
21104 if (TARGET_SSE3)
21105 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
21106 else
21108 x = copy_to_mode_reg (V2DFmode, fp_xmm);
21109 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
21110 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
21113 ix86_expand_vector_extract (false, target, fp_xmm, 0);
21116 /* Not used, but eases macroization of patterns. */
21117 void
21118 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
21120 gcc_unreachable ();
21123 /* Convert an unsigned SImode value into a DFmode. Only currently used
21124 for SSE, but applicable anywhere. */
21126 void
21127 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
21129 REAL_VALUE_TYPE TWO31r;
21130 rtx x, fp;
21132 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
21133 NULL, 1, OPTAB_DIRECT);
21135 fp = gen_reg_rtx (DFmode);
21136 emit_insn (gen_floatsidf2 (fp, x));
21138 real_ldexp (&TWO31r, &dconst1, 31);
21139 x = const_double_from_real_value (TWO31r, DFmode);
21141 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
21142 if (x != target)
21143 emit_move_insn (target, x);
21146 /* Convert a signed DImode value into a DFmode. Only used for SSE in
21147 32-bit mode; otherwise we have a direct convert instruction. */
21149 void
21150 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
21152 REAL_VALUE_TYPE TWO32r;
21153 rtx fp_lo, fp_hi, x;
21155 fp_lo = gen_reg_rtx (DFmode);
21156 fp_hi = gen_reg_rtx (DFmode);
21158 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
21160 real_ldexp (&TWO32r, &dconst1, 32);
21161 x = const_double_from_real_value (TWO32r, DFmode);
21162 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
21164 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
21166 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
21167 0, OPTAB_DIRECT);
21168 if (x != target)
21169 emit_move_insn (target, x);
21172 /* Convert an unsigned SImode value into a SFmode, using only SSE.
21173 For x86_32, -mfpmath=sse, !optimize_size only. */
21174 void
21175 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
21177 REAL_VALUE_TYPE ONE16r;
21178 rtx fp_hi, fp_lo, int_hi, int_lo, x;
21180 real_ldexp (&ONE16r, &dconst1, 16);
21181 x = const_double_from_real_value (ONE16r, SFmode);
21182 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
21183 NULL, 0, OPTAB_DIRECT);
21184 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
21185 NULL, 0, OPTAB_DIRECT);
21186 fp_hi = gen_reg_rtx (SFmode);
21187 fp_lo = gen_reg_rtx (SFmode);
21188 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
21189 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
21190 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
21191 0, OPTAB_DIRECT);
21192 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
21193 0, OPTAB_DIRECT);
21194 if (!rtx_equal_p (target, fp_hi))
21195 emit_move_insn (target, fp_hi);
21198 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
21199 a vector of unsigned ints VAL to vector of floats TARGET. */
21201 void
21202 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
21204 rtx tmp[8];
21205 REAL_VALUE_TYPE TWO16r;
21206 machine_mode intmode = GET_MODE (val);
21207 machine_mode fltmode = GET_MODE (target);
21208 rtx (*cvt) (rtx, rtx);
21210 if (intmode == V4SImode)
21211 cvt = gen_floatv4siv4sf2;
21212 else
21213 cvt = gen_floatv8siv8sf2;
21214 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
21215 tmp[0] = force_reg (intmode, tmp[0]);
21216 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
21217 OPTAB_DIRECT);
21218 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
21219 NULL_RTX, 1, OPTAB_DIRECT);
21220 tmp[3] = gen_reg_rtx (fltmode);
21221 emit_insn (cvt (tmp[3], tmp[1]));
21222 tmp[4] = gen_reg_rtx (fltmode);
21223 emit_insn (cvt (tmp[4], tmp[2]));
21224 real_ldexp (&TWO16r, &dconst1, 16);
21225 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
21226 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
21227 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
21228 OPTAB_DIRECT);
21229 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
21230 OPTAB_DIRECT);
21231 if (tmp[7] != target)
21232 emit_move_insn (target, tmp[7]);
21235 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
21236 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
21237 This is done by doing just signed conversion if < 0x1p31, and otherwise by
21238 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
21241 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
21243 REAL_VALUE_TYPE TWO31r;
21244 rtx two31r, tmp[4];
21245 machine_mode mode = GET_MODE (val);
21246 machine_mode scalarmode = GET_MODE_INNER (mode);
21247 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
21248 rtx (*cmp) (rtx, rtx, rtx, rtx);
21249 int i;
21251 for (i = 0; i < 3; i++)
21252 tmp[i] = gen_reg_rtx (mode);
21253 real_ldexp (&TWO31r, &dconst1, 31);
21254 two31r = const_double_from_real_value (TWO31r, scalarmode);
21255 two31r = ix86_build_const_vector (mode, 1, two31r);
21256 two31r = force_reg (mode, two31r);
21257 switch (mode)
21259 case E_V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
21260 case E_V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
21261 case E_V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
21262 case E_V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
21263 default: gcc_unreachable ();
21265 tmp[3] = gen_rtx_LE (mode, two31r, val);
21266 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
21267 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
21268 0, OPTAB_DIRECT);
21269 if (intmode == V4SImode || TARGET_AVX2)
21270 *xorp = expand_simple_binop (intmode, ASHIFT,
21271 gen_lowpart (intmode, tmp[0]),
21272 GEN_INT (31), NULL_RTX, 0,
21273 OPTAB_DIRECT);
21274 else
21276 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
21277 two31 = ix86_build_const_vector (intmode, 1, two31);
21278 *xorp = expand_simple_binop (intmode, AND,
21279 gen_lowpart (intmode, tmp[0]),
21280 two31, NULL_RTX, 0,
21281 OPTAB_DIRECT);
21283 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
21284 0, OPTAB_DIRECT);
21287 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
21288 then replicate the value for all elements of the vector
21289 register. */
21292 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
21294 int i, n_elt;
21295 rtvec v;
21296 machine_mode scalar_mode;
21298 switch (mode)
21300 case E_V64QImode:
21301 case E_V32QImode:
21302 case E_V16QImode:
21303 case E_V32HImode:
21304 case E_V16HImode:
21305 case E_V8HImode:
21306 case E_V16SImode:
21307 case E_V8SImode:
21308 case E_V4SImode:
21309 case E_V8DImode:
21310 case E_V4DImode:
21311 case E_V2DImode:
21312 gcc_assert (vect);
21313 /* FALLTHRU */
21314 case E_V16SFmode:
21315 case E_V8SFmode:
21316 case E_V4SFmode:
21317 case E_V8DFmode:
21318 case E_V4DFmode:
21319 case E_V2DFmode:
21320 n_elt = GET_MODE_NUNITS (mode);
21321 v = rtvec_alloc (n_elt);
21322 scalar_mode = GET_MODE_INNER (mode);
21324 RTVEC_ELT (v, 0) = value;
21326 for (i = 1; i < n_elt; ++i)
21327 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
21329 return gen_rtx_CONST_VECTOR (mode, v);
21331 default:
21332 gcc_unreachable ();
21336 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
21337 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
21338 for an SSE register. If VECT is true, then replicate the mask for
21339 all elements of the vector register. If INVERT is true, then create
21340 a mask excluding the sign bit. */
21343 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
21345 machine_mode vec_mode, imode;
21346 wide_int w;
21347 rtx mask, v;
21349 switch (mode)
21351 case E_V16SImode:
21352 case E_V16SFmode:
21353 case E_V8SImode:
21354 case E_V4SImode:
21355 case E_V8SFmode:
21356 case E_V4SFmode:
21357 vec_mode = mode;
21358 imode = SImode;
21359 break;
21361 case E_V8DImode:
21362 case E_V4DImode:
21363 case E_V2DImode:
21364 case E_V8DFmode:
21365 case E_V4DFmode:
21366 case E_V2DFmode:
21367 vec_mode = mode;
21368 imode = DImode;
21369 break;
21371 case E_TImode:
21372 case E_TFmode:
21373 vec_mode = VOIDmode;
21374 imode = TImode;
21375 break;
21377 default:
21378 gcc_unreachable ();
21381 machine_mode inner_mode = GET_MODE_INNER (mode);
21382 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
21383 GET_MODE_BITSIZE (inner_mode));
21384 if (invert)
21385 w = wi::bit_not (w);
21387 /* Force this value into the low part of a fp vector constant. */
21388 mask = immed_wide_int_const (w, imode);
21389 mask = gen_lowpart (inner_mode, mask);
21391 if (vec_mode == VOIDmode)
21392 return force_reg (inner_mode, mask);
21394 v = ix86_build_const_vector (vec_mode, vect, mask);
21395 return force_reg (vec_mode, v);
21398 /* Generate code for floating point ABS or NEG. */
21400 void
21401 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
21402 rtx operands[])
21404 rtx mask, set, dst, src;
21405 bool use_sse = false;
21406 bool vector_mode = VECTOR_MODE_P (mode);
21407 machine_mode vmode = mode;
21409 if (vector_mode)
21410 use_sse = true;
21411 else if (mode == TFmode)
21412 use_sse = true;
21413 else if (TARGET_SSE_MATH)
21415 use_sse = SSE_FLOAT_MODE_P (mode);
21416 if (mode == SFmode)
21417 vmode = V4SFmode;
21418 else if (mode == DFmode)
21419 vmode = V2DFmode;
21422 /* NEG and ABS performed with SSE use bitwise mask operations.
21423 Create the appropriate mask now. */
21424 if (use_sse)
21425 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
21426 else
21427 mask = NULL_RTX;
21429 dst = operands[0];
21430 src = operands[1];
21432 set = gen_rtx_fmt_e (code, mode, src);
21433 set = gen_rtx_SET (dst, set);
21435 if (mask)
21437 rtx use, clob;
21438 rtvec par;
21440 use = gen_rtx_USE (VOIDmode, mask);
21441 if (vector_mode)
21442 par = gen_rtvec (2, set, use);
21443 else
21445 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
21446 par = gen_rtvec (3, set, use, clob);
21448 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
21450 else
21451 emit_insn (set);
21454 /* Expand a copysign operation. Special case operand 0 being a constant. */
21456 void
21457 ix86_expand_copysign (rtx operands[])
21459 machine_mode mode, vmode;
21460 rtx dest, op0, op1, mask, nmask;
21462 dest = operands[0];
21463 op0 = operands[1];
21464 op1 = operands[2];
21466 mode = GET_MODE (dest);
21468 if (mode == SFmode)
21469 vmode = V4SFmode;
21470 else if (mode == DFmode)
21471 vmode = V2DFmode;
21472 else
21473 vmode = mode;
21475 if (CONST_DOUBLE_P (op0))
21477 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
21479 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
21480 op0 = simplify_unary_operation (ABS, mode, op0, mode);
21482 if (mode == SFmode || mode == DFmode)
21484 if (op0 == CONST0_RTX (mode))
21485 op0 = CONST0_RTX (vmode);
21486 else
21488 rtx v = ix86_build_const_vector (vmode, false, op0);
21490 op0 = force_reg (vmode, v);
21493 else if (op0 != CONST0_RTX (mode))
21494 op0 = force_reg (mode, op0);
21496 mask = ix86_build_signbit_mask (vmode, 0, 0);
21498 if (mode == SFmode)
21499 copysign_insn = gen_copysignsf3_const;
21500 else if (mode == DFmode)
21501 copysign_insn = gen_copysigndf3_const;
21502 else
21503 copysign_insn = gen_copysigntf3_const;
21505 emit_insn (copysign_insn (dest, op0, op1, mask));
21507 else
21509 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
21511 nmask = ix86_build_signbit_mask (vmode, 0, 1);
21512 mask = ix86_build_signbit_mask (vmode, 0, 0);
21514 if (mode == SFmode)
21515 copysign_insn = gen_copysignsf3_var;
21516 else if (mode == DFmode)
21517 copysign_insn = gen_copysigndf3_var;
21518 else
21519 copysign_insn = gen_copysigntf3_var;
21521 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
21525 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
21526 be a constant, and so has already been expanded into a vector constant. */
21528 void
21529 ix86_split_copysign_const (rtx operands[])
21531 machine_mode mode, vmode;
21532 rtx dest, op0, mask, x;
21534 dest = operands[0];
21535 op0 = operands[1];
21536 mask = operands[3];
21538 mode = GET_MODE (dest);
21539 vmode = GET_MODE (mask);
21541 dest = lowpart_subreg (vmode, dest, mode);
21542 x = gen_rtx_AND (vmode, dest, mask);
21543 emit_insn (gen_rtx_SET (dest, x));
21545 if (op0 != CONST0_RTX (vmode))
21547 x = gen_rtx_IOR (vmode, dest, op0);
21548 emit_insn (gen_rtx_SET (dest, x));
21552 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
21553 so we have to do two masks. */
21555 void
21556 ix86_split_copysign_var (rtx operands[])
21558 machine_mode mode, vmode;
21559 rtx dest, scratch, op0, op1, mask, nmask, x;
21561 dest = operands[0];
21562 scratch = operands[1];
21563 op0 = operands[2];
21564 op1 = operands[3];
21565 nmask = operands[4];
21566 mask = operands[5];
21568 mode = GET_MODE (dest);
21569 vmode = GET_MODE (mask);
21571 if (rtx_equal_p (op0, op1))
21573 /* Shouldn't happen often (it's useless, obviously), but when it does
21574 we'd generate incorrect code if we continue below. */
21575 emit_move_insn (dest, op0);
21576 return;
21579 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
21581 gcc_assert (REGNO (op1) == REGNO (scratch));
21583 x = gen_rtx_AND (vmode, scratch, mask);
21584 emit_insn (gen_rtx_SET (scratch, x));
21586 dest = mask;
21587 op0 = lowpart_subreg (vmode, op0, mode);
21588 x = gen_rtx_NOT (vmode, dest);
21589 x = gen_rtx_AND (vmode, x, op0);
21590 emit_insn (gen_rtx_SET (dest, x));
21592 else
21594 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
21596 x = gen_rtx_AND (vmode, scratch, mask);
21598 else /* alternative 2,4 */
21600 gcc_assert (REGNO (mask) == REGNO (scratch));
21601 op1 = lowpart_subreg (vmode, op1, mode);
21602 x = gen_rtx_AND (vmode, scratch, op1);
21604 emit_insn (gen_rtx_SET (scratch, x));
21606 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
21608 dest = lowpart_subreg (vmode, op0, mode);
21609 x = gen_rtx_AND (vmode, dest, nmask);
21611 else /* alternative 3,4 */
21613 gcc_assert (REGNO (nmask) == REGNO (dest));
21614 dest = nmask;
21615 op0 = lowpart_subreg (vmode, op0, mode);
21616 x = gen_rtx_AND (vmode, dest, op0);
21618 emit_insn (gen_rtx_SET (dest, x));
21621 x = gen_rtx_IOR (vmode, dest, scratch);
21622 emit_insn (gen_rtx_SET (dest, x));
21625 /* Return TRUE or FALSE depending on whether the first SET in INSN
21626 has source and destination with matching CC modes, and that the
21627 CC mode is at least as constrained as REQ_MODE. */
21629 bool
21630 ix86_match_ccmode (rtx insn, machine_mode req_mode)
21632 rtx set;
21633 machine_mode set_mode;
21635 set = PATTERN (insn);
21636 if (GET_CODE (set) == PARALLEL)
21637 set = XVECEXP (set, 0, 0);
21638 gcc_assert (GET_CODE (set) == SET);
21639 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
21641 set_mode = GET_MODE (SET_DEST (set));
21642 switch (set_mode)
21644 case E_CCNOmode:
21645 if (req_mode != CCNOmode
21646 && (req_mode != CCmode
21647 || XEXP (SET_SRC (set), 1) != const0_rtx))
21648 return false;
21649 break;
21650 case E_CCmode:
21651 if (req_mode == CCGCmode)
21652 return false;
21653 /* FALLTHRU */
21654 case E_CCGCmode:
21655 if (req_mode == CCGOCmode || req_mode == CCNOmode)
21656 return false;
21657 /* FALLTHRU */
21658 case E_CCGOCmode:
21659 if (req_mode == CCZmode)
21660 return false;
21661 /* FALLTHRU */
21662 case E_CCZmode:
21663 break;
21665 case E_CCGZmode:
21667 case E_CCAmode:
21668 case E_CCCmode:
21669 case E_CCOmode:
21670 case E_CCPmode:
21671 case E_CCSmode:
21672 if (set_mode != req_mode)
21673 return false;
21674 break;
21676 default:
21677 gcc_unreachable ();
21680 return GET_MODE (SET_SRC (set)) == set_mode;
21683 /* Generate insn patterns to do an integer compare of OPERANDS. */
21685 static rtx
21686 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
21688 machine_mode cmpmode;
21689 rtx tmp, flags;
21691 cmpmode = SELECT_CC_MODE (code, op0, op1);
21692 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
21694 /* This is very simple, but making the interface the same as in the
21695 FP case makes the rest of the code easier. */
21696 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
21697 emit_insn (gen_rtx_SET (flags, tmp));
21699 /* Return the test that should be put into the flags user, i.e.
21700 the bcc, scc, or cmov instruction. */
21701 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
21704 /* Figure out whether to use unordered fp comparisons. */
21706 static bool
21707 ix86_unordered_fp_compare (enum rtx_code code)
21709 if (!TARGET_IEEE_FP)
21710 return false;
21712 switch (code)
21714 case GT:
21715 case GE:
21716 case LT:
21717 case LE:
21718 return false;
21720 case EQ:
21721 case NE:
21723 case LTGT:
21724 case UNORDERED:
21725 case ORDERED:
21726 case UNLT:
21727 case UNLE:
21728 case UNGT:
21729 case UNGE:
21730 case UNEQ:
21731 return true;
21733 default:
21734 gcc_unreachable ();
21738 machine_mode
21739 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
21741 machine_mode mode = GET_MODE (op0);
21743 if (SCALAR_FLOAT_MODE_P (mode))
21745 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21746 return CCFPmode;
21749 switch (code)
21751 /* Only zero flag is needed. */
21752 case EQ: /* ZF=0 */
21753 case NE: /* ZF!=0 */
21754 return CCZmode;
21755 /* Codes needing carry flag. */
21756 case GEU: /* CF=0 */
21757 case LTU: /* CF=1 */
21758 /* Detect overflow checks. They need just the carry flag. */
21759 if (GET_CODE (op0) == PLUS
21760 && (rtx_equal_p (op1, XEXP (op0, 0))
21761 || rtx_equal_p (op1, XEXP (op0, 1))))
21762 return CCCmode;
21763 else
21764 return CCmode;
21765 case GTU: /* CF=0 & ZF=0 */
21766 case LEU: /* CF=1 | ZF=1 */
21767 return CCmode;
21768 /* Codes possibly doable only with sign flag when
21769 comparing against zero. */
21770 case GE: /* SF=OF or SF=0 */
21771 case LT: /* SF<>OF or SF=1 */
21772 if (op1 == const0_rtx)
21773 return CCGOCmode;
21774 else
21775 /* For other cases Carry flag is not required. */
21776 return CCGCmode;
21777 /* Codes doable only with sign flag when comparing
21778 against zero, but we miss jump instruction for it
21779 so we need to use relational tests against overflow
21780 that thus needs to be zero. */
21781 case GT: /* ZF=0 & SF=OF */
21782 case LE: /* ZF=1 | SF<>OF */
21783 if (op1 == const0_rtx)
21784 return CCNOmode;
21785 else
21786 return CCGCmode;
21787 /* strcmp pattern do (use flags) and combine may ask us for proper
21788 mode. */
21789 case USE:
21790 return CCmode;
21791 default:
21792 gcc_unreachable ();
21796 /* Return the fixed registers used for condition codes. */
21798 static bool
21799 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
21801 *p1 = FLAGS_REG;
21802 *p2 = FPSR_REG;
21803 return true;
21806 /* If two condition code modes are compatible, return a condition code
21807 mode which is compatible with both. Otherwise, return
21808 VOIDmode. */
21810 static machine_mode
21811 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
21813 if (m1 == m2)
21814 return m1;
21816 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
21817 return VOIDmode;
21819 if ((m1 == CCGCmode && m2 == CCGOCmode)
21820 || (m1 == CCGOCmode && m2 == CCGCmode))
21821 return CCGCmode;
21823 if ((m1 == CCNOmode && m2 == CCGOCmode)
21824 || (m1 == CCGOCmode && m2 == CCNOmode))
21825 return CCNOmode;
21827 if (m1 == CCZmode
21828 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
21829 return m2;
21830 else if (m2 == CCZmode
21831 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
21832 return m1;
21834 switch (m1)
21836 default:
21837 gcc_unreachable ();
21839 case E_CCmode:
21840 case E_CCGCmode:
21841 case E_CCGOCmode:
21842 case E_CCNOmode:
21843 case E_CCAmode:
21844 case E_CCCmode:
21845 case E_CCOmode:
21846 case E_CCPmode:
21847 case E_CCSmode:
21848 case E_CCZmode:
21849 switch (m2)
21851 default:
21852 return VOIDmode;
21854 case E_CCmode:
21855 case E_CCGCmode:
21856 case E_CCGOCmode:
21857 case E_CCNOmode:
21858 case E_CCAmode:
21859 case E_CCCmode:
21860 case E_CCOmode:
21861 case E_CCPmode:
21862 case E_CCSmode:
21863 case E_CCZmode:
21864 return CCmode;
21867 case E_CCFPmode:
21868 /* These are only compatible with themselves, which we already
21869 checked above. */
21870 return VOIDmode;
21875 /* Return a comparison we can do and that it is equivalent to
21876 swap_condition (code) apart possibly from orderedness.
21877 But, never change orderedness if TARGET_IEEE_FP, returning
21878 UNKNOWN in that case if necessary. */
21880 static enum rtx_code
21881 ix86_fp_swap_condition (enum rtx_code code)
21883 switch (code)
21885 case GT: /* GTU - CF=0 & ZF=0 */
21886 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
21887 case GE: /* GEU - CF=0 */
21888 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
21889 case UNLT: /* LTU - CF=1 */
21890 return TARGET_IEEE_FP ? UNKNOWN : GT;
21891 case UNLE: /* LEU - CF=1 | ZF=1 */
21892 return TARGET_IEEE_FP ? UNKNOWN : GE;
21893 default:
21894 return swap_condition (code);
21898 /* Return cost of comparison CODE using the best strategy for performance.
21899 All following functions do use number of instructions as a cost metrics.
21900 In future this should be tweaked to compute bytes for optimize_size and
21901 take into account performance of various instructions on various CPUs. */
21903 static int
21904 ix86_fp_comparison_cost (enum rtx_code code)
21906 int arith_cost;
21908 /* The cost of code using bit-twiddling on %ah. */
21909 switch (code)
21911 case UNLE:
21912 case UNLT:
21913 case LTGT:
21914 case GT:
21915 case GE:
21916 case UNORDERED:
21917 case ORDERED:
21918 case UNEQ:
21919 arith_cost = 4;
21920 break;
21921 case LT:
21922 case NE:
21923 case EQ:
21924 case UNGE:
21925 arith_cost = TARGET_IEEE_FP ? 5 : 4;
21926 break;
21927 case LE:
21928 case UNGT:
21929 arith_cost = TARGET_IEEE_FP ? 6 : 4;
21930 break;
21931 default:
21932 gcc_unreachable ();
21935 switch (ix86_fp_comparison_strategy (code))
21937 case IX86_FPCMP_COMI:
21938 return arith_cost > 4 ? 3 : 2;
21939 case IX86_FPCMP_SAHF:
21940 return arith_cost > 4 ? 4 : 3;
21941 default:
21942 return arith_cost;
21946 /* Return strategy to use for floating-point. We assume that fcomi is always
21947 preferrable where available, since that is also true when looking at size
21948 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
21950 enum ix86_fpcmp_strategy
21951 ix86_fp_comparison_strategy (enum rtx_code)
21953 /* Do fcomi/sahf based test when profitable. */
21955 if (TARGET_CMOVE)
21956 return IX86_FPCMP_COMI;
21958 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
21959 return IX86_FPCMP_SAHF;
21961 return IX86_FPCMP_ARITH;
21964 /* Swap, force into registers, or otherwise massage the two operands
21965 to a fp comparison. The operands are updated in place; the new
21966 comparison code is returned. */
21968 static enum rtx_code
21969 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
21971 bool unordered_compare = ix86_unordered_fp_compare (code);
21972 rtx op0 = *pop0, op1 = *pop1;
21973 machine_mode op_mode = GET_MODE (op0);
21974 bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
21976 /* All of the unordered compare instructions only work on registers.
21977 The same is true of the fcomi compare instructions. The XFmode
21978 compare instructions require registers except when comparing
21979 against zero or when converting operand 1 from fixed point to
21980 floating point. */
21982 if (!is_sse
21983 && (unordered_compare
21984 || (op_mode == XFmode
21985 && ! (standard_80387_constant_p (op0) == 1
21986 || standard_80387_constant_p (op1) == 1)
21987 && GET_CODE (op1) != FLOAT)
21988 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
21990 op0 = force_reg (op_mode, op0);
21991 op1 = force_reg (op_mode, op1);
21993 else
21995 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
21996 things around if they appear profitable, otherwise force op0
21997 into a register. */
21999 if (standard_80387_constant_p (op0) == 0
22000 || (MEM_P (op0)
22001 && ! (standard_80387_constant_p (op1) == 0
22002 || MEM_P (op1))))
22004 enum rtx_code new_code = ix86_fp_swap_condition (code);
22005 if (new_code != UNKNOWN)
22007 std::swap (op0, op1);
22008 code = new_code;
22012 if (!REG_P (op0))
22013 op0 = force_reg (op_mode, op0);
22015 if (CONSTANT_P (op1))
22017 int tmp = standard_80387_constant_p (op1);
22018 if (tmp == 0)
22019 op1 = validize_mem (force_const_mem (op_mode, op1));
22020 else if (tmp == 1)
22022 if (TARGET_CMOVE)
22023 op1 = force_reg (op_mode, op1);
22025 else
22026 op1 = force_reg (op_mode, op1);
22030 /* Try to rearrange the comparison to make it cheaper. */
22031 if (ix86_fp_comparison_cost (code)
22032 > ix86_fp_comparison_cost (swap_condition (code))
22033 && (REG_P (op1) || can_create_pseudo_p ()))
22035 std::swap (op0, op1);
22036 code = swap_condition (code);
22037 if (!REG_P (op0))
22038 op0 = force_reg (op_mode, op0);
22041 *pop0 = op0;
22042 *pop1 = op1;
22043 return code;
22046 /* Convert comparison codes we use to represent FP comparison to integer
22047 code that will result in proper branch. Return UNKNOWN if no such code
22048 is available. */
22050 enum rtx_code
22051 ix86_fp_compare_code_to_integer (enum rtx_code code)
22053 switch (code)
22055 case GT:
22056 return GTU;
22057 case GE:
22058 return GEU;
22059 case ORDERED:
22060 case UNORDERED:
22061 return code;
22062 case UNEQ:
22063 return EQ;
22064 case UNLT:
22065 return LTU;
22066 case UNLE:
22067 return LEU;
22068 case LTGT:
22069 return NE;
22070 default:
22071 return UNKNOWN;
22075 /* Generate insn patterns to do a floating point compare of OPERANDS. */
22077 static rtx
22078 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
22080 bool unordered_compare = ix86_unordered_fp_compare (code);
22081 machine_mode intcmp_mode;
22082 rtx tmp, tmp2;
22084 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
22086 /* Do fcomi/sahf based test when profitable. */
22087 switch (ix86_fp_comparison_strategy (code))
22089 case IX86_FPCMP_COMI:
22090 intcmp_mode = CCFPmode;
22091 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22092 if (unordered_compare)
22093 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22094 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp));
22095 break;
22097 case IX86_FPCMP_SAHF:
22098 intcmp_mode = CCFPmode;
22099 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22100 if (unordered_compare)
22101 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22102 tmp = gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp);
22103 if (!scratch)
22104 scratch = gen_reg_rtx (HImode);
22105 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
22106 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
22107 break;
22109 case IX86_FPCMP_ARITH:
22110 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
22111 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22112 if (unordered_compare)
22113 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22114 tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
22115 if (!scratch)
22116 scratch = gen_reg_rtx (HImode);
22117 emit_insn (gen_rtx_SET (scratch, tmp));
22119 /* In the unordered case, we have to check C2 for NaN's, which
22120 doesn't happen to work out to anything nice combination-wise.
22121 So do some bit twiddling on the value we've got in AH to come
22122 up with an appropriate set of condition codes. */
22124 intcmp_mode = CCNOmode;
22125 switch (code)
22127 case GT:
22128 case UNGT:
22129 if (code == GT || !TARGET_IEEE_FP)
22131 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
22132 code = EQ;
22134 else
22136 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22137 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22138 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
22139 intcmp_mode = CCmode;
22140 code = GEU;
22142 break;
22143 case LT:
22144 case UNLT:
22145 if (code == LT && TARGET_IEEE_FP)
22147 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22148 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
22149 intcmp_mode = CCmode;
22150 code = EQ;
22152 else
22154 emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx));
22155 code = NE;
22157 break;
22158 case GE:
22159 case UNGE:
22160 if (code == GE || !TARGET_IEEE_FP)
22162 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05)));
22163 code = EQ;
22165 else
22167 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22168 emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx));
22169 code = NE;
22171 break;
22172 case LE:
22173 case UNLE:
22174 if (code == LE && TARGET_IEEE_FP)
22176 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22177 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22178 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22179 intcmp_mode = CCmode;
22180 code = LTU;
22182 else
22184 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
22185 code = NE;
22187 break;
22188 case EQ:
22189 case UNEQ:
22190 if (code == EQ && TARGET_IEEE_FP)
22192 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22193 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22194 intcmp_mode = CCmode;
22195 code = EQ;
22197 else
22199 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
22200 code = NE;
22202 break;
22203 case NE:
22204 case LTGT:
22205 if (code == NE && TARGET_IEEE_FP)
22207 emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22208 emit_insn (gen_xorqi_ext_1_cc (scratch, scratch,
22209 GEN_INT (0x40)));
22210 code = NE;
22212 else
22214 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
22215 code = EQ;
22217 break;
22219 case UNORDERED:
22220 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
22221 code = NE;
22222 break;
22223 case ORDERED:
22224 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
22225 code = EQ;
22226 break;
22228 default:
22229 gcc_unreachable ();
22231 break;
22233 default:
22234 gcc_unreachable();
22237 /* Return the test that should be put into the flags user, i.e.
22238 the bcc, scc, or cmov instruction. */
22239 return gen_rtx_fmt_ee (code, VOIDmode,
22240 gen_rtx_REG (intcmp_mode, FLAGS_REG),
22241 const0_rtx);
22244 static rtx
22245 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
22247 rtx ret;
22249 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
22250 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
22252 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
22254 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
22255 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
22257 else
22258 ret = ix86_expand_int_compare (code, op0, op1);
22260 return ret;
22263 void
22264 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
22266 machine_mode mode = GET_MODE (op0);
22267 rtx tmp;
22269 /* Handle special case - vector comparsion with boolean result, transform
22270 it using ptest instruction. */
22271 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22273 rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG);
22274 machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode;
22276 gcc_assert (code == EQ || code == NE);
22277 /* Generate XOR since we can't check that one operand is zero vector. */
22278 tmp = gen_reg_rtx (mode);
22279 emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1)));
22280 tmp = gen_lowpart (p_mode, tmp);
22281 emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG),
22282 gen_rtx_UNSPEC (CCmode,
22283 gen_rtvec (2, tmp, tmp),
22284 UNSPEC_PTEST)));
22285 tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx);
22286 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22287 gen_rtx_LABEL_REF (VOIDmode, label),
22288 pc_rtx);
22289 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
22290 return;
22293 switch (mode)
22295 case E_SFmode:
22296 case E_DFmode:
22297 case E_XFmode:
22298 case E_QImode:
22299 case E_HImode:
22300 case E_SImode:
22301 simple:
22302 tmp = ix86_expand_compare (code, op0, op1);
22303 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22304 gen_rtx_LABEL_REF (VOIDmode, label),
22305 pc_rtx);
22306 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
22307 return;
22309 case E_DImode:
22310 if (TARGET_64BIT)
22311 goto simple;
22312 /* For 32-bit target DI comparison may be performed on
22313 SSE registers. To allow this we should avoid split
22314 to SI mode which is achieved by doing xor in DI mode
22315 and then comparing with zero (which is recognized by
22316 STV pass). We don't compare using xor when optimizing
22317 for size. */
22318 if (!optimize_insn_for_size_p ()
22319 && TARGET_STV
22320 && (code == EQ || code == NE))
22322 op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1));
22323 op1 = const0_rtx;
22325 /* FALLTHRU */
22326 case E_TImode:
22327 /* Expand DImode branch into multiple compare+branch. */
22329 rtx lo[2], hi[2];
22330 rtx_code_label *label2;
22331 enum rtx_code code1, code2, code3;
22332 machine_mode submode;
22334 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
22336 std::swap (op0, op1);
22337 code = swap_condition (code);
22340 split_double_mode (mode, &op0, 1, lo+0, hi+0);
22341 split_double_mode (mode, &op1, 1, lo+1, hi+1);
22343 submode = mode == DImode ? SImode : DImode;
22345 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
22346 avoid two branches. This costs one extra insn, so disable when
22347 optimizing for size. */
22349 if ((code == EQ || code == NE)
22350 && (!optimize_insn_for_size_p ()
22351 || hi[1] == const0_rtx || lo[1] == const0_rtx))
22353 rtx xor0, xor1;
22355 xor1 = hi[0];
22356 if (hi[1] != const0_rtx)
22357 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
22358 NULL_RTX, 0, OPTAB_WIDEN);
22360 xor0 = lo[0];
22361 if (lo[1] != const0_rtx)
22362 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
22363 NULL_RTX, 0, OPTAB_WIDEN);
22365 tmp = expand_binop (submode, ior_optab, xor1, xor0,
22366 NULL_RTX, 0, OPTAB_WIDEN);
22368 ix86_expand_branch (code, tmp, const0_rtx, label);
22369 return;
22372 /* Otherwise, if we are doing less-than or greater-or-equal-than,
22373 op1 is a constant and the low word is zero, then we can just
22374 examine the high word. Similarly for low word -1 and
22375 less-or-equal-than or greater-than. */
22377 if (CONST_INT_P (hi[1]))
22378 switch (code)
22380 case LT: case LTU: case GE: case GEU:
22381 if (lo[1] == const0_rtx)
22383 ix86_expand_branch (code, hi[0], hi[1], label);
22384 return;
22386 break;
22387 case LE: case LEU: case GT: case GTU:
22388 if (lo[1] == constm1_rtx)
22390 ix86_expand_branch (code, hi[0], hi[1], label);
22391 return;
22393 break;
22394 default:
22395 break;
22398 /* Emulate comparisons that do not depend on Zero flag with
22399 double-word subtraction. Note that only Overflow, Sign
22400 and Carry flags are valid, so swap arguments and condition
22401 of comparisons that would otherwise test Zero flag. */
22403 switch (code)
22405 case LE: case LEU: case GT: case GTU:
22406 std::swap (lo[0], lo[1]);
22407 std::swap (hi[0], hi[1]);
22408 code = swap_condition (code);
22409 /* FALLTHRU */
22411 case LT: case LTU: case GE: case GEU:
22413 rtx (*cmp_insn) (rtx, rtx);
22414 rtx (*sbb_insn) (rtx, rtx, rtx);
22415 bool uns = (code == LTU || code == GEU);
22417 if (TARGET_64BIT)
22419 cmp_insn = gen_cmpdi_1;
22420 sbb_insn
22421 = uns ? gen_subdi3_carry_ccc : gen_subdi3_carry_ccgz;
22423 else
22425 cmp_insn = gen_cmpsi_1;
22426 sbb_insn
22427 = uns ? gen_subsi3_carry_ccc : gen_subsi3_carry_ccgz;
22430 if (!nonimmediate_operand (lo[0], submode))
22431 lo[0] = force_reg (submode, lo[0]);
22432 if (!x86_64_general_operand (lo[1], submode))
22433 lo[1] = force_reg (submode, lo[1]);
22435 if (!register_operand (hi[0], submode))
22436 hi[0] = force_reg (submode, hi[0]);
22437 if ((uns && !nonimmediate_operand (hi[1], submode))
22438 || (!uns && !x86_64_general_operand (hi[1], submode)))
22439 hi[1] = force_reg (submode, hi[1]);
22441 emit_insn (cmp_insn (lo[0], lo[1]));
22442 emit_insn (sbb_insn (gen_rtx_SCRATCH (submode), hi[0], hi[1]));
22444 tmp = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
22446 ix86_expand_branch (code, tmp, const0_rtx, label);
22447 return;
22450 default:
22451 break;
22454 /* Otherwise, we need two or three jumps. */
22456 label2 = gen_label_rtx ();
22458 code1 = code;
22459 code2 = swap_condition (code);
22460 code3 = unsigned_condition (code);
22462 switch (code)
22464 case LT: case GT: case LTU: case GTU:
22465 break;
22467 case LE: code1 = LT; code2 = GT; break;
22468 case GE: code1 = GT; code2 = LT; break;
22469 case LEU: code1 = LTU; code2 = GTU; break;
22470 case GEU: code1 = GTU; code2 = LTU; break;
22472 case EQ: code1 = UNKNOWN; code2 = NE; break;
22473 case NE: code2 = UNKNOWN; break;
22475 default:
22476 gcc_unreachable ();
22480 * a < b =>
22481 * if (hi(a) < hi(b)) goto true;
22482 * if (hi(a) > hi(b)) goto false;
22483 * if (lo(a) < lo(b)) goto true;
22484 * false:
22487 if (code1 != UNKNOWN)
22488 ix86_expand_branch (code1, hi[0], hi[1], label);
22489 if (code2 != UNKNOWN)
22490 ix86_expand_branch (code2, hi[0], hi[1], label2);
22492 ix86_expand_branch (code3, lo[0], lo[1], label);
22494 if (code2 != UNKNOWN)
22495 emit_label (label2);
22496 return;
22499 default:
22500 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
22501 goto simple;
22505 void
22506 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22508 rtx ret;
22510 gcc_assert (GET_MODE (dest) == QImode);
22512 ret = ix86_expand_compare (code, op0, op1);
22513 PUT_MODE (ret, QImode);
22514 emit_insn (gen_rtx_SET (dest, ret));
22517 /* Expand comparison setting or clearing carry flag. Return true when
22518 successful and set pop for the operation. */
22519 static bool
22520 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
22522 machine_mode mode =
22523 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
22525 /* Do not handle double-mode compares that go through special path. */
22526 if (mode == (TARGET_64BIT ? TImode : DImode))
22527 return false;
22529 if (SCALAR_FLOAT_MODE_P (mode))
22531 rtx compare_op;
22532 rtx_insn *compare_seq;
22534 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
22536 /* Shortcut: following common codes never translate
22537 into carry flag compares. */
22538 if (code == EQ || code == NE || code == UNEQ || code == LTGT
22539 || code == ORDERED || code == UNORDERED)
22540 return false;
22542 /* These comparisons require zero flag; swap operands so they won't. */
22543 if ((code == GT || code == UNLE || code == LE || code == UNGT)
22544 && !TARGET_IEEE_FP)
22546 std::swap (op0, op1);
22547 code = swap_condition (code);
22550 /* Try to expand the comparison and verify that we end up with
22551 carry flag based comparison. This fails to be true only when
22552 we decide to expand comparison using arithmetic that is not
22553 too common scenario. */
22554 start_sequence ();
22555 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
22556 compare_seq = get_insns ();
22557 end_sequence ();
22559 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode)
22560 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
22561 else
22562 code = GET_CODE (compare_op);
22564 if (code != LTU && code != GEU)
22565 return false;
22567 emit_insn (compare_seq);
22568 *pop = compare_op;
22569 return true;
22572 if (!INTEGRAL_MODE_P (mode))
22573 return false;
22575 switch (code)
22577 case LTU:
22578 case GEU:
22579 break;
22581 /* Convert a==0 into (unsigned)a<1. */
22582 case EQ:
22583 case NE:
22584 if (op1 != const0_rtx)
22585 return false;
22586 op1 = const1_rtx;
22587 code = (code == EQ ? LTU : GEU);
22588 break;
22590 /* Convert a>b into b<a or a>=b-1. */
22591 case GTU:
22592 case LEU:
22593 if (CONST_INT_P (op1))
22595 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
22596 /* Bail out on overflow. We still can swap operands but that
22597 would force loading of the constant into register. */
22598 if (op1 == const0_rtx
22599 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
22600 return false;
22601 code = (code == GTU ? GEU : LTU);
22603 else
22605 std::swap (op0, op1);
22606 code = (code == GTU ? LTU : GEU);
22608 break;
22610 /* Convert a>=0 into (unsigned)a<0x80000000. */
22611 case LT:
22612 case GE:
22613 if (mode == DImode || op1 != const0_rtx)
22614 return false;
22615 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22616 code = (code == LT ? GEU : LTU);
22617 break;
22618 case LE:
22619 case GT:
22620 if (mode == DImode || op1 != constm1_rtx)
22621 return false;
22622 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
22623 code = (code == LE ? GEU : LTU);
22624 break;
22626 default:
22627 return false;
22629 /* Swapping operands may cause constant to appear as first operand. */
22630 if (!nonimmediate_operand (op0, VOIDmode))
22632 if (!can_create_pseudo_p ())
22633 return false;
22634 op0 = force_reg (mode, op0);
22636 *pop = ix86_expand_compare (code, op0, op1);
22637 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
22638 return true;
22641 bool
22642 ix86_expand_int_movcc (rtx operands[])
22644 enum rtx_code code = GET_CODE (operands[1]), compare_code;
22645 rtx_insn *compare_seq;
22646 rtx compare_op;
22647 machine_mode mode = GET_MODE (operands[0]);
22648 bool sign_bit_compare_p = false;
22649 rtx op0 = XEXP (operands[1], 0);
22650 rtx op1 = XEXP (operands[1], 1);
22652 if (GET_MODE (op0) == TImode
22653 || (GET_MODE (op0) == DImode
22654 && !TARGET_64BIT))
22655 return false;
22657 start_sequence ();
22658 compare_op = ix86_expand_compare (code, op0, op1);
22659 compare_seq = get_insns ();
22660 end_sequence ();
22662 compare_code = GET_CODE (compare_op);
22664 if ((op1 == const0_rtx && (code == GE || code == LT))
22665 || (op1 == constm1_rtx && (code == GT || code == LE)))
22666 sign_bit_compare_p = true;
22668 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
22669 HImode insns, we'd be swallowed in word prefix ops. */
22671 if ((mode != HImode || TARGET_FAST_PREFIX)
22672 && (mode != (TARGET_64BIT ? TImode : DImode))
22673 && CONST_INT_P (operands[2])
22674 && CONST_INT_P (operands[3]))
22676 rtx out = operands[0];
22677 HOST_WIDE_INT ct = INTVAL (operands[2]);
22678 HOST_WIDE_INT cf = INTVAL (operands[3]);
22679 HOST_WIDE_INT diff;
22681 diff = ct - cf;
22682 /* Sign bit compares are better done using shifts than we do by using
22683 sbb. */
22684 if (sign_bit_compare_p
22685 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22687 /* Detect overlap between destination and compare sources. */
22688 rtx tmp = out;
22690 if (!sign_bit_compare_p)
22692 rtx flags;
22693 bool fpcmp = false;
22695 compare_code = GET_CODE (compare_op);
22697 flags = XEXP (compare_op, 0);
22699 if (GET_MODE (flags) == CCFPmode)
22701 fpcmp = true;
22702 compare_code
22703 = ix86_fp_compare_code_to_integer (compare_code);
22706 /* To simplify rest of code, restrict to the GEU case. */
22707 if (compare_code == LTU)
22709 std::swap (ct, cf);
22710 compare_code = reverse_condition (compare_code);
22711 code = reverse_condition (code);
22713 else
22715 if (fpcmp)
22716 PUT_CODE (compare_op,
22717 reverse_condition_maybe_unordered
22718 (GET_CODE (compare_op)));
22719 else
22720 PUT_CODE (compare_op,
22721 reverse_condition (GET_CODE (compare_op)));
22723 diff = ct - cf;
22725 if (reg_overlap_mentioned_p (out, op0)
22726 || reg_overlap_mentioned_p (out, op1))
22727 tmp = gen_reg_rtx (mode);
22729 if (mode == DImode)
22730 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
22731 else
22732 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
22733 flags, compare_op));
22735 else
22737 if (code == GT || code == GE)
22738 code = reverse_condition (code);
22739 else
22741 std::swap (ct, cf);
22742 diff = ct - cf;
22744 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
22747 if (diff == 1)
22750 * cmpl op0,op1
22751 * sbbl dest,dest
22752 * [addl dest, ct]
22754 * Size 5 - 8.
22756 if (ct)
22757 tmp = expand_simple_binop (mode, PLUS,
22758 tmp, GEN_INT (ct),
22759 copy_rtx (tmp), 1, OPTAB_DIRECT);
22761 else if (cf == -1)
22764 * cmpl op0,op1
22765 * sbbl dest,dest
22766 * orl $ct, dest
22768 * Size 8.
22770 tmp = expand_simple_binop (mode, IOR,
22771 tmp, GEN_INT (ct),
22772 copy_rtx (tmp), 1, OPTAB_DIRECT);
22774 else if (diff == -1 && ct)
22777 * cmpl op0,op1
22778 * sbbl dest,dest
22779 * notl dest
22780 * [addl dest, cf]
22782 * Size 8 - 11.
22784 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22785 if (cf)
22786 tmp = expand_simple_binop (mode, PLUS,
22787 copy_rtx (tmp), GEN_INT (cf),
22788 copy_rtx (tmp), 1, OPTAB_DIRECT);
22790 else
22793 * cmpl op0,op1
22794 * sbbl dest,dest
22795 * [notl dest]
22796 * andl cf - ct, dest
22797 * [addl dest, ct]
22799 * Size 8 - 11.
22802 if (cf == 0)
22804 cf = ct;
22805 ct = 0;
22806 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22809 tmp = expand_simple_binop (mode, AND,
22810 copy_rtx (tmp),
22811 gen_int_mode (cf - ct, mode),
22812 copy_rtx (tmp), 1, OPTAB_DIRECT);
22813 if (ct)
22814 tmp = expand_simple_binop (mode, PLUS,
22815 copy_rtx (tmp), GEN_INT (ct),
22816 copy_rtx (tmp), 1, OPTAB_DIRECT);
22819 if (!rtx_equal_p (tmp, out))
22820 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
22822 return true;
22825 if (diff < 0)
22827 machine_mode cmp_mode = GET_MODE (op0);
22828 enum rtx_code new_code;
22830 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22832 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22834 /* We may be reversing unordered compare to normal compare, that
22835 is not valid in general (we may convert non-trapping condition
22836 to trapping one), however on i386 we currently emit all
22837 comparisons unordered. */
22838 new_code = reverse_condition_maybe_unordered (code);
22840 else
22841 new_code = ix86_reverse_condition (code, cmp_mode);
22842 if (new_code != UNKNOWN)
22844 std::swap (ct, cf);
22845 diff = -diff;
22846 code = new_code;
22850 compare_code = UNKNOWN;
22851 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
22852 && CONST_INT_P (op1))
22854 if (op1 == const0_rtx
22855 && (code == LT || code == GE))
22856 compare_code = code;
22857 else if (op1 == constm1_rtx)
22859 if (code == LE)
22860 compare_code = LT;
22861 else if (code == GT)
22862 compare_code = GE;
22866 /* Optimize dest = (op0 < 0) ? -1 : cf. */
22867 if (compare_code != UNKNOWN
22868 && GET_MODE (op0) == GET_MODE (out)
22869 && (cf == -1 || ct == -1))
22871 /* If lea code below could be used, only optimize
22872 if it results in a 2 insn sequence. */
22874 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
22875 || diff == 3 || diff == 5 || diff == 9)
22876 || (compare_code == LT && ct == -1)
22877 || (compare_code == GE && cf == -1))
22880 * notl op1 (if necessary)
22881 * sarl $31, op1
22882 * orl cf, op1
22884 if (ct != -1)
22886 cf = ct;
22887 ct = -1;
22888 code = reverse_condition (code);
22891 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22893 out = expand_simple_binop (mode, IOR,
22894 out, GEN_INT (cf),
22895 out, 1, OPTAB_DIRECT);
22896 if (out != operands[0])
22897 emit_move_insn (operands[0], out);
22899 return true;
22904 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
22905 || diff == 3 || diff == 5 || diff == 9)
22906 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
22907 && (mode != DImode
22908 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
22911 * xorl dest,dest
22912 * cmpl op1,op2
22913 * setcc dest
22914 * lea cf(dest*(ct-cf)),dest
22916 * Size 14.
22918 * This also catches the degenerate setcc-only case.
22921 rtx tmp;
22922 int nops;
22924 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22926 nops = 0;
22927 /* On x86_64 the lea instruction operates on Pmode, so we need
22928 to get arithmetics done in proper mode to match. */
22929 if (diff == 1)
22930 tmp = copy_rtx (out);
22931 else
22933 rtx out1;
22934 out1 = copy_rtx (out);
22935 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
22936 nops++;
22937 if (diff & 1)
22939 tmp = gen_rtx_PLUS (mode, tmp, out1);
22940 nops++;
22943 if (cf != 0)
22945 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
22946 nops++;
22948 if (!rtx_equal_p (tmp, out))
22950 if (nops == 1)
22951 out = force_operand (tmp, copy_rtx (out));
22952 else
22953 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
22955 if (!rtx_equal_p (out, operands[0]))
22956 emit_move_insn (operands[0], copy_rtx (out));
22958 return true;
22962 * General case: Jumpful:
22963 * xorl dest,dest cmpl op1, op2
22964 * cmpl op1, op2 movl ct, dest
22965 * setcc dest jcc 1f
22966 * decl dest movl cf, dest
22967 * andl (cf-ct),dest 1:
22968 * addl ct,dest
22970 * Size 20. Size 14.
22972 * This is reasonably steep, but branch mispredict costs are
22973 * high on modern cpus, so consider failing only if optimizing
22974 * for space.
22977 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22978 && BRANCH_COST (optimize_insn_for_speed_p (),
22979 false) >= 2)
22981 if (cf == 0)
22983 machine_mode cmp_mode = GET_MODE (op0);
22984 enum rtx_code new_code;
22986 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22988 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22990 /* We may be reversing unordered compare to normal compare,
22991 that is not valid in general (we may convert non-trapping
22992 condition to trapping one), however on i386 we currently
22993 emit all comparisons unordered. */
22994 new_code = reverse_condition_maybe_unordered (code);
22996 else
22998 new_code = ix86_reverse_condition (code, cmp_mode);
22999 if (compare_code != UNKNOWN && new_code != UNKNOWN)
23000 compare_code = reverse_condition (compare_code);
23003 if (new_code != UNKNOWN)
23005 cf = ct;
23006 ct = 0;
23007 code = new_code;
23011 if (compare_code != UNKNOWN)
23013 /* notl op1 (if needed)
23014 sarl $31, op1
23015 andl (cf-ct), op1
23016 addl ct, op1
23018 For x < 0 (resp. x <= -1) there will be no notl,
23019 so if possible swap the constants to get rid of the
23020 complement.
23021 True/false will be -1/0 while code below (store flag
23022 followed by decrement) is 0/-1, so the constants need
23023 to be exchanged once more. */
23025 if (compare_code == GE || !cf)
23027 code = reverse_condition (code);
23028 compare_code = LT;
23030 else
23031 std::swap (ct, cf);
23033 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
23035 else
23037 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
23039 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
23040 constm1_rtx,
23041 copy_rtx (out), 1, OPTAB_DIRECT);
23044 out = expand_simple_binop (mode, AND, copy_rtx (out),
23045 gen_int_mode (cf - ct, mode),
23046 copy_rtx (out), 1, OPTAB_DIRECT);
23047 if (ct)
23048 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
23049 copy_rtx (out), 1, OPTAB_DIRECT);
23050 if (!rtx_equal_p (out, operands[0]))
23051 emit_move_insn (operands[0], copy_rtx (out));
23053 return true;
23057 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23059 /* Try a few things more with specific constants and a variable. */
23061 optab op;
23062 rtx var, orig_out, out, tmp;
23064 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
23065 return false;
23067 /* If one of the two operands is an interesting constant, load a
23068 constant with the above and mask it in with a logical operation. */
23070 if (CONST_INT_P (operands[2]))
23072 var = operands[3];
23073 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
23074 operands[3] = constm1_rtx, op = and_optab;
23075 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
23076 operands[3] = const0_rtx, op = ior_optab;
23077 else
23078 return false;
23080 else if (CONST_INT_P (operands[3]))
23082 var = operands[2];
23083 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
23084 operands[2] = constm1_rtx, op = and_optab;
23085 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
23086 operands[2] = const0_rtx, op = ior_optab;
23087 else
23088 return false;
23090 else
23091 return false;
23093 orig_out = operands[0];
23094 tmp = gen_reg_rtx (mode);
23095 operands[0] = tmp;
23097 /* Recurse to get the constant loaded. */
23098 if (!ix86_expand_int_movcc (operands))
23099 return false;
23101 /* Mask in the interesting variable. */
23102 out = expand_binop (mode, op, var, tmp, orig_out, 0,
23103 OPTAB_WIDEN);
23104 if (!rtx_equal_p (out, orig_out))
23105 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
23107 return true;
23111 * For comparison with above,
23113 * movl cf,dest
23114 * movl ct,tmp
23115 * cmpl op1,op2
23116 * cmovcc tmp,dest
23118 * Size 15.
23121 if (! nonimmediate_operand (operands[2], mode))
23122 operands[2] = force_reg (mode, operands[2]);
23123 if (! nonimmediate_operand (operands[3], mode))
23124 operands[3] = force_reg (mode, operands[3]);
23126 if (! register_operand (operands[2], VOIDmode)
23127 && (mode == QImode
23128 || ! register_operand (operands[3], VOIDmode)))
23129 operands[2] = force_reg (mode, operands[2]);
23131 if (mode == QImode
23132 && ! register_operand (operands[3], VOIDmode))
23133 operands[3] = force_reg (mode, operands[3]);
23135 emit_insn (compare_seq);
23136 emit_insn (gen_rtx_SET (operands[0],
23137 gen_rtx_IF_THEN_ELSE (mode,
23138 compare_op, operands[2],
23139 operands[3])));
23140 return true;
23143 /* Swap, force into registers, or otherwise massage the two operands
23144 to an sse comparison with a mask result. Thus we differ a bit from
23145 ix86_prepare_fp_compare_args which expects to produce a flags result.
23147 The DEST operand exists to help determine whether to commute commutative
23148 operators. The POP0/POP1 operands are updated in place. The new
23149 comparison code is returned, or UNKNOWN if not implementable. */
23151 static enum rtx_code
23152 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
23153 rtx *pop0, rtx *pop1)
23155 switch (code)
23157 case LTGT:
23158 case UNEQ:
23159 /* AVX supports all the needed comparisons. */
23160 if (TARGET_AVX)
23161 break;
23162 /* We have no LTGT as an operator. We could implement it with
23163 NE & ORDERED, but this requires an extra temporary. It's
23164 not clear that it's worth it. */
23165 return UNKNOWN;
23167 case LT:
23168 case LE:
23169 case UNGT:
23170 case UNGE:
23171 /* These are supported directly. */
23172 break;
23174 case EQ:
23175 case NE:
23176 case UNORDERED:
23177 case ORDERED:
23178 /* AVX has 3 operand comparisons, no need to swap anything. */
23179 if (TARGET_AVX)
23180 break;
23181 /* For commutative operators, try to canonicalize the destination
23182 operand to be first in the comparison - this helps reload to
23183 avoid extra moves. */
23184 if (!dest || !rtx_equal_p (dest, *pop1))
23185 break;
23186 /* FALLTHRU */
23188 case GE:
23189 case GT:
23190 case UNLE:
23191 case UNLT:
23192 /* These are not supported directly before AVX, and furthermore
23193 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
23194 comparison operands to transform into something that is
23195 supported. */
23196 std::swap (*pop0, *pop1);
23197 code = swap_condition (code);
23198 break;
23200 default:
23201 gcc_unreachable ();
23204 return code;
23207 /* Detect conditional moves that exactly match min/max operational
23208 semantics. Note that this is IEEE safe, as long as we don't
23209 interchange the operands.
23211 Returns FALSE if this conditional move doesn't match a MIN/MAX,
23212 and TRUE if the operation is successful and instructions are emitted. */
23214 static bool
23215 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
23216 rtx cmp_op1, rtx if_true, rtx if_false)
23218 machine_mode mode;
23219 bool is_min;
23220 rtx tmp;
23222 if (code == LT)
23224 else if (code == UNGE)
23225 std::swap (if_true, if_false);
23226 else
23227 return false;
23229 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
23230 is_min = true;
23231 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
23232 is_min = false;
23233 else
23234 return false;
23236 mode = GET_MODE (dest);
23238 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
23239 but MODE may be a vector mode and thus not appropriate. */
23240 if (!flag_finite_math_only || flag_signed_zeros)
23242 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
23243 rtvec v;
23245 if_true = force_reg (mode, if_true);
23246 v = gen_rtvec (2, if_true, if_false);
23247 tmp = gen_rtx_UNSPEC (mode, v, u);
23249 else
23251 code = is_min ? SMIN : SMAX;
23252 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
23255 emit_insn (gen_rtx_SET (dest, tmp));
23256 return true;
23259 /* Expand an sse vector comparison. Return the register with the result. */
23261 static rtx
23262 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
23263 rtx op_true, rtx op_false)
23265 machine_mode mode = GET_MODE (dest);
23266 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
23268 /* In general case result of comparison can differ from operands' type. */
23269 machine_mode cmp_mode;
23271 /* In AVX512F the result of comparison is an integer mask. */
23272 bool maskcmp = false;
23273 rtx x;
23275 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
23277 unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
23278 cmp_mode = int_mode_for_size (nbits, 0).require ();
23279 maskcmp = true;
23281 else
23282 cmp_mode = cmp_ops_mode;
23285 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
23286 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
23287 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
23289 if (optimize
23290 || (maskcmp && cmp_mode != mode)
23291 || (op_true && reg_overlap_mentioned_p (dest, op_true))
23292 || (op_false && reg_overlap_mentioned_p (dest, op_false)))
23293 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
23295 /* Compare patterns for int modes are unspec in AVX512F only. */
23296 if (maskcmp && (code == GT || code == EQ))
23298 rtx (*gen)(rtx, rtx, rtx);
23300 switch (cmp_ops_mode)
23302 case E_V64QImode:
23303 gcc_assert (TARGET_AVX512BW);
23304 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
23305 break;
23306 case E_V32HImode:
23307 gcc_assert (TARGET_AVX512BW);
23308 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
23309 break;
23310 case E_V16SImode:
23311 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
23312 break;
23313 case E_V8DImode:
23314 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
23315 break;
23316 default:
23317 gen = NULL;
23320 if (gen)
23322 emit_insn (gen (dest, cmp_op0, cmp_op1));
23323 return dest;
23326 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
23328 if (cmp_mode != mode && !maskcmp)
23330 x = force_reg (cmp_ops_mode, x);
23331 convert_move (dest, x, false);
23333 else
23334 emit_insn (gen_rtx_SET (dest, x));
23336 return dest;
23339 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
23340 operations. This is used for both scalar and vector conditional moves. */
23342 void
23343 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
23345 machine_mode mode = GET_MODE (dest);
23346 machine_mode cmpmode = GET_MODE (cmp);
23348 /* In AVX512F the result of comparison is an integer mask. */
23349 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
23351 rtx t2, t3, x;
23353 /* If we have an integer mask and FP value then we need
23354 to cast mask to FP mode. */
23355 if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
23357 cmp = force_reg (cmpmode, cmp);
23358 cmp = gen_rtx_SUBREG (mode, cmp, 0);
23361 if (vector_all_ones_operand (op_true, mode)
23362 && rtx_equal_p (op_false, CONST0_RTX (mode))
23363 && !maskcmp)
23365 emit_insn (gen_rtx_SET (dest, cmp));
23367 else if (op_false == CONST0_RTX (mode)
23368 && !maskcmp)
23370 op_true = force_reg (mode, op_true);
23371 x = gen_rtx_AND (mode, cmp, op_true);
23372 emit_insn (gen_rtx_SET (dest, x));
23374 else if (op_true == CONST0_RTX (mode)
23375 && !maskcmp)
23377 op_false = force_reg (mode, op_false);
23378 x = gen_rtx_NOT (mode, cmp);
23379 x = gen_rtx_AND (mode, x, op_false);
23380 emit_insn (gen_rtx_SET (dest, x));
23382 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
23383 && !maskcmp)
23385 op_false = force_reg (mode, op_false);
23386 x = gen_rtx_IOR (mode, cmp, op_false);
23387 emit_insn (gen_rtx_SET (dest, x));
23389 else if (TARGET_XOP
23390 && !maskcmp)
23392 op_true = force_reg (mode, op_true);
23394 if (!nonimmediate_operand (op_false, mode))
23395 op_false = force_reg (mode, op_false);
23397 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
23398 op_true,
23399 op_false)));
23401 else
23403 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
23404 rtx d = dest;
23406 if (!nonimmediate_operand (op_true, mode))
23407 op_true = force_reg (mode, op_true);
23409 op_false = force_reg (mode, op_false);
23411 switch (mode)
23413 case E_V4SFmode:
23414 if (TARGET_SSE4_1)
23415 gen = gen_sse4_1_blendvps;
23416 break;
23417 case E_V2DFmode:
23418 if (TARGET_SSE4_1)
23419 gen = gen_sse4_1_blendvpd;
23420 break;
23421 case E_V16QImode:
23422 case E_V8HImode:
23423 case E_V4SImode:
23424 case E_V2DImode:
23425 if (TARGET_SSE4_1)
23427 gen = gen_sse4_1_pblendvb;
23428 if (mode != V16QImode)
23429 d = gen_reg_rtx (V16QImode);
23430 op_false = gen_lowpart (V16QImode, op_false);
23431 op_true = gen_lowpart (V16QImode, op_true);
23432 cmp = gen_lowpart (V16QImode, cmp);
23434 break;
23435 case E_V8SFmode:
23436 if (TARGET_AVX)
23437 gen = gen_avx_blendvps256;
23438 break;
23439 case E_V4DFmode:
23440 if (TARGET_AVX)
23441 gen = gen_avx_blendvpd256;
23442 break;
23443 case E_V32QImode:
23444 case E_V16HImode:
23445 case E_V8SImode:
23446 case E_V4DImode:
23447 if (TARGET_AVX2)
23449 gen = gen_avx2_pblendvb;
23450 if (mode != V32QImode)
23451 d = gen_reg_rtx (V32QImode);
23452 op_false = gen_lowpart (V32QImode, op_false);
23453 op_true = gen_lowpart (V32QImode, op_true);
23454 cmp = gen_lowpart (V32QImode, cmp);
23456 break;
23458 case E_V64QImode:
23459 gen = gen_avx512bw_blendmv64qi;
23460 break;
23461 case E_V32HImode:
23462 gen = gen_avx512bw_blendmv32hi;
23463 break;
23464 case E_V16SImode:
23465 gen = gen_avx512f_blendmv16si;
23466 break;
23467 case E_V8DImode:
23468 gen = gen_avx512f_blendmv8di;
23469 break;
23470 case E_V8DFmode:
23471 gen = gen_avx512f_blendmv8df;
23472 break;
23473 case E_V16SFmode:
23474 gen = gen_avx512f_blendmv16sf;
23475 break;
23477 default:
23478 break;
23481 if (gen != NULL)
23483 emit_insn (gen (d, op_false, op_true, cmp));
23484 if (d != dest)
23485 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
23487 else
23489 op_true = force_reg (mode, op_true);
23491 t2 = gen_reg_rtx (mode);
23492 if (optimize)
23493 t3 = gen_reg_rtx (mode);
23494 else
23495 t3 = dest;
23497 x = gen_rtx_AND (mode, op_true, cmp);
23498 emit_insn (gen_rtx_SET (t2, x));
23500 x = gen_rtx_NOT (mode, cmp);
23501 x = gen_rtx_AND (mode, x, op_false);
23502 emit_insn (gen_rtx_SET (t3, x));
23504 x = gen_rtx_IOR (mode, t3, t2);
23505 emit_insn (gen_rtx_SET (dest, x));
23510 /* Expand a floating-point conditional move. Return true if successful. */
23512 bool
23513 ix86_expand_fp_movcc (rtx operands[])
23515 machine_mode mode = GET_MODE (operands[0]);
23516 enum rtx_code code = GET_CODE (operands[1]);
23517 rtx tmp, compare_op;
23518 rtx op0 = XEXP (operands[1], 0);
23519 rtx op1 = XEXP (operands[1], 1);
23521 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
23523 machine_mode cmode;
23525 /* Since we've no cmove for sse registers, don't force bad register
23526 allocation just to gain access to it. Deny movcc when the
23527 comparison mode doesn't match the move mode. */
23528 cmode = GET_MODE (op0);
23529 if (cmode == VOIDmode)
23530 cmode = GET_MODE (op1);
23531 if (cmode != mode)
23532 return false;
23534 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
23535 if (code == UNKNOWN)
23536 return false;
23538 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
23539 operands[2], operands[3]))
23540 return true;
23542 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
23543 operands[2], operands[3]);
23544 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
23545 return true;
23548 if (GET_MODE (op0) == TImode
23549 || (GET_MODE (op0) == DImode
23550 && !TARGET_64BIT))
23551 return false;
23553 /* The floating point conditional move instructions don't directly
23554 support conditions resulting from a signed integer comparison. */
23556 compare_op = ix86_expand_compare (code, op0, op1);
23557 if (!fcmov_comparison_operator (compare_op, VOIDmode))
23559 tmp = gen_reg_rtx (QImode);
23560 ix86_expand_setcc (tmp, code, op0, op1);
23562 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
23565 emit_insn (gen_rtx_SET (operands[0],
23566 gen_rtx_IF_THEN_ELSE (mode, compare_op,
23567 operands[2], operands[3])));
23569 return true;
23572 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
23574 static int
23575 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
23577 switch (code)
23579 case EQ:
23580 return 0;
23581 case LT:
23582 case LTU:
23583 return 1;
23584 case LE:
23585 case LEU:
23586 return 2;
23587 case NE:
23588 return 4;
23589 case GE:
23590 case GEU:
23591 return 5;
23592 case GT:
23593 case GTU:
23594 return 6;
23595 default:
23596 gcc_unreachable ();
23600 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
23602 static int
23603 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
23605 switch (code)
23607 case EQ:
23608 return 0x00;
23609 case NE:
23610 return 0x04;
23611 case GT:
23612 return 0x0e;
23613 case LE:
23614 return 0x02;
23615 case GE:
23616 return 0x0d;
23617 case LT:
23618 return 0x01;
23619 case UNLE:
23620 return 0x0a;
23621 case UNLT:
23622 return 0x09;
23623 case UNGE:
23624 return 0x05;
23625 case UNGT:
23626 return 0x06;
23627 case UNEQ:
23628 return 0x18;
23629 case LTGT:
23630 return 0x0c;
23631 case ORDERED:
23632 return 0x07;
23633 case UNORDERED:
23634 return 0x03;
23635 default:
23636 gcc_unreachable ();
23640 /* Return immediate value to be used in UNSPEC_PCMP
23641 for comparison CODE in MODE. */
23643 static int
23644 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
23646 if (FLOAT_MODE_P (mode))
23647 return ix86_fp_cmp_code_to_pcmp_immediate (code);
23648 return ix86_int_cmp_code_to_pcmp_immediate (code);
23651 /* Expand AVX-512 vector comparison. */
23653 bool
23654 ix86_expand_mask_vec_cmp (rtx operands[])
23656 machine_mode mask_mode = GET_MODE (operands[0]);
23657 machine_mode cmp_mode = GET_MODE (operands[2]);
23658 enum rtx_code code = GET_CODE (operands[1]);
23659 rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
23660 int unspec_code;
23661 rtx unspec;
23663 switch (code)
23665 case LEU:
23666 case GTU:
23667 case GEU:
23668 case LTU:
23669 unspec_code = UNSPEC_UNSIGNED_PCMP;
23670 break;
23672 default:
23673 unspec_code = UNSPEC_PCMP;
23676 unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
23677 operands[3], imm),
23678 unspec_code);
23679 emit_insn (gen_rtx_SET (operands[0], unspec));
23681 return true;
23684 /* Expand fp vector comparison. */
23686 bool
23687 ix86_expand_fp_vec_cmp (rtx operands[])
23689 enum rtx_code code = GET_CODE (operands[1]);
23690 rtx cmp;
23692 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23693 &operands[2], &operands[3]);
23694 if (code == UNKNOWN)
23696 rtx temp;
23697 switch (GET_CODE (operands[1]))
23699 case LTGT:
23700 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
23701 operands[3], NULL, NULL);
23702 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
23703 operands[3], NULL, NULL);
23704 code = AND;
23705 break;
23706 case UNEQ:
23707 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
23708 operands[3], NULL, NULL);
23709 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
23710 operands[3], NULL, NULL);
23711 code = IOR;
23712 break;
23713 default:
23714 gcc_unreachable ();
23716 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23717 OPTAB_DIRECT);
23719 else
23720 cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
23721 operands[1], operands[2]);
23723 if (operands[0] != cmp)
23724 emit_move_insn (operands[0], cmp);
23726 return true;
23729 static rtx
23730 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
23731 rtx op_true, rtx op_false, bool *negate)
23733 machine_mode data_mode = GET_MODE (dest);
23734 machine_mode mode = GET_MODE (cop0);
23735 rtx x;
23737 *negate = false;
23739 /* XOP supports all of the comparisons on all 128-bit vector int types. */
23740 if (TARGET_XOP
23741 && (mode == V16QImode || mode == V8HImode
23742 || mode == V4SImode || mode == V2DImode))
23744 else
23746 /* Canonicalize the comparison to EQ, GT, GTU. */
23747 switch (code)
23749 case EQ:
23750 case GT:
23751 case GTU:
23752 break;
23754 case NE:
23755 case LE:
23756 case LEU:
23757 code = reverse_condition (code);
23758 *negate = true;
23759 break;
23761 case GE:
23762 case GEU:
23763 code = reverse_condition (code);
23764 *negate = true;
23765 /* FALLTHRU */
23767 case LT:
23768 case LTU:
23769 std::swap (cop0, cop1);
23770 code = swap_condition (code);
23771 break;
23773 default:
23774 gcc_unreachable ();
23777 /* Only SSE4.1/SSE4.2 supports V2DImode. */
23778 if (mode == V2DImode)
23780 switch (code)
23782 case EQ:
23783 /* SSE4.1 supports EQ. */
23784 if (!TARGET_SSE4_1)
23785 return NULL;
23786 break;
23788 case GT:
23789 case GTU:
23790 /* SSE4.2 supports GT/GTU. */
23791 if (!TARGET_SSE4_2)
23792 return NULL;
23793 break;
23795 default:
23796 gcc_unreachable ();
23800 /* Unsigned parallel compare is not supported by the hardware.
23801 Play some tricks to turn this into a signed comparison
23802 against 0. */
23803 if (code == GTU)
23805 cop0 = force_reg (mode, cop0);
23807 switch (mode)
23809 case E_V16SImode:
23810 case E_V8DImode:
23811 case E_V8SImode:
23812 case E_V4DImode:
23813 case E_V4SImode:
23814 case E_V2DImode:
23816 rtx t1, t2, mask;
23817 rtx (*gen_sub3) (rtx, rtx, rtx);
23819 switch (mode)
23821 case E_V16SImode: gen_sub3 = gen_subv16si3; break;
23822 case E_V8DImode: gen_sub3 = gen_subv8di3; break;
23823 case E_V8SImode: gen_sub3 = gen_subv8si3; break;
23824 case E_V4DImode: gen_sub3 = gen_subv4di3; break;
23825 case E_V4SImode: gen_sub3 = gen_subv4si3; break;
23826 case E_V2DImode: gen_sub3 = gen_subv2di3; break;
23827 default:
23828 gcc_unreachable ();
23830 /* Subtract (-(INT MAX) - 1) from both operands to make
23831 them signed. */
23832 mask = ix86_build_signbit_mask (mode, true, false);
23833 t1 = gen_reg_rtx (mode);
23834 emit_insn (gen_sub3 (t1, cop0, mask));
23836 t2 = gen_reg_rtx (mode);
23837 emit_insn (gen_sub3 (t2, cop1, mask));
23839 cop0 = t1;
23840 cop1 = t2;
23841 code = GT;
23843 break;
23845 case E_V64QImode:
23846 case E_V32HImode:
23847 case E_V32QImode:
23848 case E_V16HImode:
23849 case E_V16QImode:
23850 case E_V8HImode:
23851 /* Perform a parallel unsigned saturating subtraction. */
23852 x = gen_reg_rtx (mode);
23853 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
23854 cop1)));
23856 cop0 = x;
23857 cop1 = CONST0_RTX (mode);
23858 code = EQ;
23859 *negate = !*negate;
23860 break;
23862 default:
23863 gcc_unreachable ();
23868 if (*negate)
23869 std::swap (op_true, op_false);
23871 /* Allow the comparison to be done in one mode, but the movcc to
23872 happen in another mode. */
23873 if (data_mode == mode)
23875 x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
23876 op_true, op_false);
23878 else
23880 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
23881 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
23882 op_true, op_false);
23883 if (GET_MODE (x) == mode)
23884 x = gen_lowpart (data_mode, x);
23887 return x;
23890 /* Expand integer vector comparison. */
23892 bool
23893 ix86_expand_int_vec_cmp (rtx operands[])
23895 rtx_code code = GET_CODE (operands[1]);
23896 bool negate = false;
23897 rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
23898 operands[3], NULL, NULL, &negate);
23900 if (!cmp)
23901 return false;
23903 if (negate)
23904 cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
23905 CONST0_RTX (GET_MODE (cmp)),
23906 NULL, NULL, &negate);
23908 gcc_assert (!negate);
23910 if (operands[0] != cmp)
23911 emit_move_insn (operands[0], cmp);
23913 return true;
23916 /* Expand a floating-point vector conditional move; a vcond operation
23917 rather than a movcc operation. */
23919 bool
23920 ix86_expand_fp_vcond (rtx operands[])
23922 enum rtx_code code = GET_CODE (operands[3]);
23923 rtx cmp;
23925 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23926 &operands[4], &operands[5]);
23927 if (code == UNKNOWN)
23929 rtx temp;
23930 switch (GET_CODE (operands[3]))
23932 case LTGT:
23933 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
23934 operands[5], operands[0], operands[0]);
23935 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
23936 operands[5], operands[1], operands[2]);
23937 code = AND;
23938 break;
23939 case UNEQ:
23940 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
23941 operands[5], operands[0], operands[0]);
23942 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
23943 operands[5], operands[1], operands[2]);
23944 code = IOR;
23945 break;
23946 default:
23947 gcc_unreachable ();
23949 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23950 OPTAB_DIRECT);
23951 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23952 return true;
23955 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
23956 operands[5], operands[1], operands[2]))
23957 return true;
23959 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
23960 operands[1], operands[2]);
23961 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23962 return true;
23965 /* Expand a signed/unsigned integral vector conditional move. */
23967 bool
23968 ix86_expand_int_vcond (rtx operands[])
23970 machine_mode data_mode = GET_MODE (operands[0]);
23971 machine_mode mode = GET_MODE (operands[4]);
23972 enum rtx_code code = GET_CODE (operands[3]);
23973 bool negate = false;
23974 rtx x, cop0, cop1;
23976 cop0 = operands[4];
23977 cop1 = operands[5];
23979 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
23980 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
23981 if ((code == LT || code == GE)
23982 && data_mode == mode
23983 && cop1 == CONST0_RTX (mode)
23984 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
23985 && GET_MODE_UNIT_SIZE (data_mode) > 1
23986 && GET_MODE_UNIT_SIZE (data_mode) <= 8
23987 && (GET_MODE_SIZE (data_mode) == 16
23988 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
23990 rtx negop = operands[2 - (code == LT)];
23991 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
23992 if (negop == CONST1_RTX (data_mode))
23994 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
23995 operands[0], 1, OPTAB_DIRECT);
23996 if (res != operands[0])
23997 emit_move_insn (operands[0], res);
23998 return true;
24000 else if (GET_MODE_INNER (data_mode) != DImode
24001 && vector_all_ones_operand (negop, data_mode))
24003 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
24004 operands[0], 0, OPTAB_DIRECT);
24005 if (res != operands[0])
24006 emit_move_insn (operands[0], res);
24007 return true;
24011 if (!nonimmediate_operand (cop1, mode))
24012 cop1 = force_reg (mode, cop1);
24013 if (!general_operand (operands[1], data_mode))
24014 operands[1] = force_reg (data_mode, operands[1]);
24015 if (!general_operand (operands[2], data_mode))
24016 operands[2] = force_reg (data_mode, operands[2]);
24018 x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
24019 operands[1], operands[2], &negate);
24021 if (!x)
24022 return false;
24024 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
24025 operands[2-negate]);
24026 return true;
24029 /* AVX512F does support 64-byte integer vector operations,
24030 thus the longest vector we are faced with is V64QImode. */
24031 #define MAX_VECT_LEN 64
24033 struct expand_vec_perm_d
24035 rtx target, op0, op1;
24036 unsigned char perm[MAX_VECT_LEN];
24037 machine_mode vmode;
24038 unsigned char nelt;
24039 bool one_operand_p;
24040 bool testing_p;
24043 static bool
24044 ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1,
24045 struct expand_vec_perm_d *d)
24047 /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24048 expander, so args are either in d, or in op0, op1 etc. */
24049 machine_mode mode = GET_MODE (d ? d->op0 : op0);
24050 machine_mode maskmode = mode;
24051 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
24053 switch (mode)
24055 case E_V8HImode:
24056 if (TARGET_AVX512VL && TARGET_AVX512BW)
24057 gen = gen_avx512vl_vpermt2varv8hi3;
24058 break;
24059 case E_V16HImode:
24060 if (TARGET_AVX512VL && TARGET_AVX512BW)
24061 gen = gen_avx512vl_vpermt2varv16hi3;
24062 break;
24063 case E_V64QImode:
24064 if (TARGET_AVX512VBMI)
24065 gen = gen_avx512bw_vpermt2varv64qi3;
24066 break;
24067 case E_V32HImode:
24068 if (TARGET_AVX512BW)
24069 gen = gen_avx512bw_vpermt2varv32hi3;
24070 break;
24071 case E_V4SImode:
24072 if (TARGET_AVX512VL)
24073 gen = gen_avx512vl_vpermt2varv4si3;
24074 break;
24075 case E_V8SImode:
24076 if (TARGET_AVX512VL)
24077 gen = gen_avx512vl_vpermt2varv8si3;
24078 break;
24079 case E_V16SImode:
24080 if (TARGET_AVX512F)
24081 gen = gen_avx512f_vpermt2varv16si3;
24082 break;
24083 case E_V4SFmode:
24084 if (TARGET_AVX512VL)
24086 gen = gen_avx512vl_vpermt2varv4sf3;
24087 maskmode = V4SImode;
24089 break;
24090 case E_V8SFmode:
24091 if (TARGET_AVX512VL)
24093 gen = gen_avx512vl_vpermt2varv8sf3;
24094 maskmode = V8SImode;
24096 break;
24097 case E_V16SFmode:
24098 if (TARGET_AVX512F)
24100 gen = gen_avx512f_vpermt2varv16sf3;
24101 maskmode = V16SImode;
24103 break;
24104 case E_V2DImode:
24105 if (TARGET_AVX512VL)
24106 gen = gen_avx512vl_vpermt2varv2di3;
24107 break;
24108 case E_V4DImode:
24109 if (TARGET_AVX512VL)
24110 gen = gen_avx512vl_vpermt2varv4di3;
24111 break;
24112 case E_V8DImode:
24113 if (TARGET_AVX512F)
24114 gen = gen_avx512f_vpermt2varv8di3;
24115 break;
24116 case E_V2DFmode:
24117 if (TARGET_AVX512VL)
24119 gen = gen_avx512vl_vpermt2varv2df3;
24120 maskmode = V2DImode;
24122 break;
24123 case E_V4DFmode:
24124 if (TARGET_AVX512VL)
24126 gen = gen_avx512vl_vpermt2varv4df3;
24127 maskmode = V4DImode;
24129 break;
24130 case E_V8DFmode:
24131 if (TARGET_AVX512F)
24133 gen = gen_avx512f_vpermt2varv8df3;
24134 maskmode = V8DImode;
24136 break;
24137 default:
24138 break;
24141 if (gen == NULL)
24142 return false;
24144 /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24145 expander, so args are either in d, or in op0, op1 etc. */
24146 if (d)
24148 rtx vec[64];
24149 target = d->target;
24150 op0 = d->op0;
24151 op1 = d->op1;
24152 for (int i = 0; i < d->nelt; ++i)
24153 vec[i] = GEN_INT (d->perm[i]);
24154 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
24157 emit_insn (gen (target, force_reg (maskmode, mask), op0, op1));
24158 return true;
24161 /* Expand a variable vector permutation. */
24163 void
24164 ix86_expand_vec_perm (rtx operands[])
24166 rtx target = operands[0];
24167 rtx op0 = operands[1];
24168 rtx op1 = operands[2];
24169 rtx mask = operands[3];
24170 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
24171 machine_mode mode = GET_MODE (op0);
24172 machine_mode maskmode = GET_MODE (mask);
24173 int w, e, i;
24174 bool one_operand_shuffle = rtx_equal_p (op0, op1);
24176 /* Number of elements in the vector. */
24177 w = GET_MODE_NUNITS (mode);
24178 e = GET_MODE_UNIT_SIZE (mode);
24179 gcc_assert (w <= 64);
24181 if (TARGET_AVX512F && one_operand_shuffle)
24183 rtx (*gen) (rtx, rtx, rtx) = NULL;
24184 switch (mode)
24186 case E_V16SImode:
24187 gen =gen_avx512f_permvarv16si;
24188 break;
24189 case E_V16SFmode:
24190 gen = gen_avx512f_permvarv16sf;
24191 break;
24192 case E_V8DImode:
24193 gen = gen_avx512f_permvarv8di;
24194 break;
24195 case E_V8DFmode:
24196 gen = gen_avx512f_permvarv8df;
24197 break;
24198 default:
24199 break;
24201 if (gen != NULL)
24203 emit_insn (gen (target, op0, mask));
24204 return;
24208 if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL))
24209 return;
24211 if (TARGET_AVX2)
24213 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
24215 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
24216 an constant shuffle operand. With a tiny bit of effort we can
24217 use VPERMD instead. A re-interpretation stall for V4DFmode is
24218 unfortunate but there's no avoiding it.
24219 Similarly for V16HImode we don't have instructions for variable
24220 shuffling, while for V32QImode we can use after preparing suitable
24221 masks vpshufb; vpshufb; vpermq; vpor. */
24223 if (mode == V16HImode)
24225 maskmode = mode = V32QImode;
24226 w = 32;
24227 e = 1;
24229 else
24231 maskmode = mode = V8SImode;
24232 w = 8;
24233 e = 4;
24235 t1 = gen_reg_rtx (maskmode);
24237 /* Replicate the low bits of the V4DImode mask into V8SImode:
24238 mask = { A B C D }
24239 t1 = { A A B B C C D D }. */
24240 for (i = 0; i < w / 2; ++i)
24241 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
24242 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24243 vt = force_reg (maskmode, vt);
24244 mask = gen_lowpart (maskmode, mask);
24245 if (maskmode == V8SImode)
24246 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
24247 else
24248 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
24250 /* Multiply the shuffle indicies by two. */
24251 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
24252 OPTAB_DIRECT);
24254 /* Add one to the odd shuffle indicies:
24255 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
24256 for (i = 0; i < w / 2; ++i)
24258 vec[i * 2] = const0_rtx;
24259 vec[i * 2 + 1] = const1_rtx;
24261 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24262 vt = validize_mem (force_const_mem (maskmode, vt));
24263 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
24264 OPTAB_DIRECT);
24266 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
24267 operands[3] = mask = t1;
24268 target = gen_reg_rtx (mode);
24269 op0 = gen_lowpart (mode, op0);
24270 op1 = gen_lowpart (mode, op1);
24273 switch (mode)
24275 case E_V8SImode:
24276 /* The VPERMD and VPERMPS instructions already properly ignore
24277 the high bits of the shuffle elements. No need for us to
24278 perform an AND ourselves. */
24279 if (one_operand_shuffle)
24281 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
24282 if (target != operands[0])
24283 emit_move_insn (operands[0],
24284 gen_lowpart (GET_MODE (operands[0]), target));
24286 else
24288 t1 = gen_reg_rtx (V8SImode);
24289 t2 = gen_reg_rtx (V8SImode);
24290 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
24291 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
24292 goto merge_two;
24294 return;
24296 case E_V8SFmode:
24297 mask = gen_lowpart (V8SImode, mask);
24298 if (one_operand_shuffle)
24299 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
24300 else
24302 t1 = gen_reg_rtx (V8SFmode);
24303 t2 = gen_reg_rtx (V8SFmode);
24304 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
24305 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
24306 goto merge_two;
24308 return;
24310 case E_V4SImode:
24311 /* By combining the two 128-bit input vectors into one 256-bit
24312 input vector, we can use VPERMD and VPERMPS for the full
24313 two-operand shuffle. */
24314 t1 = gen_reg_rtx (V8SImode);
24315 t2 = gen_reg_rtx (V8SImode);
24316 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
24317 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
24318 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
24319 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
24320 return;
24322 case E_V4SFmode:
24323 t1 = gen_reg_rtx (V8SFmode);
24324 t2 = gen_reg_rtx (V8SImode);
24325 mask = gen_lowpart (V4SImode, mask);
24326 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
24327 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
24328 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
24329 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
24330 return;
24332 case E_V32QImode:
24333 t1 = gen_reg_rtx (V32QImode);
24334 t2 = gen_reg_rtx (V32QImode);
24335 t3 = gen_reg_rtx (V32QImode);
24336 vt2 = GEN_INT (-128);
24337 vt = gen_const_vec_duplicate (V32QImode, vt2);
24338 vt = force_reg (V32QImode, vt);
24339 for (i = 0; i < 32; i++)
24340 vec[i] = i < 16 ? vt2 : const0_rtx;
24341 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
24342 vt2 = force_reg (V32QImode, vt2);
24343 /* From mask create two adjusted masks, which contain the same
24344 bits as mask in the low 7 bits of each vector element.
24345 The first mask will have the most significant bit clear
24346 if it requests element from the same 128-bit lane
24347 and MSB set if it requests element from the other 128-bit lane.
24348 The second mask will have the opposite values of the MSB,
24349 and additionally will have its 128-bit lanes swapped.
24350 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
24351 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
24352 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
24353 stands for other 12 bytes. */
24354 /* The bit whether element is from the same lane or the other
24355 lane is bit 4, so shift it up by 3 to the MSB position. */
24356 t5 = gen_reg_rtx (V4DImode);
24357 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
24358 GEN_INT (3)));
24359 /* Clear MSB bits from the mask just in case it had them set. */
24360 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
24361 /* After this t1 will have MSB set for elements from other lane. */
24362 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
24363 /* Clear bits other than MSB. */
24364 emit_insn (gen_andv32qi3 (t1, t1, vt));
24365 /* Or in the lower bits from mask into t3. */
24366 emit_insn (gen_iorv32qi3 (t3, t1, t2));
24367 /* And invert MSB bits in t1, so MSB is set for elements from the same
24368 lane. */
24369 emit_insn (gen_xorv32qi3 (t1, t1, vt));
24370 /* Swap 128-bit lanes in t3. */
24371 t6 = gen_reg_rtx (V4DImode);
24372 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
24373 const2_rtx, GEN_INT (3),
24374 const0_rtx, const1_rtx));
24375 /* And or in the lower bits from mask into t1. */
24376 emit_insn (gen_iorv32qi3 (t1, t1, t2));
24377 if (one_operand_shuffle)
24379 /* Each of these shuffles will put 0s in places where
24380 element from the other 128-bit lane is needed, otherwise
24381 will shuffle in the requested value. */
24382 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
24383 gen_lowpart (V32QImode, t6)));
24384 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
24385 /* For t3 the 128-bit lanes are swapped again. */
24386 t7 = gen_reg_rtx (V4DImode);
24387 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
24388 const2_rtx, GEN_INT (3),
24389 const0_rtx, const1_rtx));
24390 /* And oring both together leads to the result. */
24391 emit_insn (gen_iorv32qi3 (target, t1,
24392 gen_lowpart (V32QImode, t7)));
24393 if (target != operands[0])
24394 emit_move_insn (operands[0],
24395 gen_lowpart (GET_MODE (operands[0]), target));
24396 return;
24399 t4 = gen_reg_rtx (V32QImode);
24400 /* Similarly to the above one_operand_shuffle code,
24401 just for repeated twice for each operand. merge_two:
24402 code will merge the two results together. */
24403 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
24404 gen_lowpart (V32QImode, t6)));
24405 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
24406 gen_lowpart (V32QImode, t6)));
24407 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
24408 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
24409 t7 = gen_reg_rtx (V4DImode);
24410 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
24411 const2_rtx, GEN_INT (3),
24412 const0_rtx, const1_rtx));
24413 t8 = gen_reg_rtx (V4DImode);
24414 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
24415 const2_rtx, GEN_INT (3),
24416 const0_rtx, const1_rtx));
24417 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
24418 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
24419 t1 = t4;
24420 t2 = t3;
24421 goto merge_two;
24423 default:
24424 gcc_assert (GET_MODE_SIZE (mode) <= 16);
24425 break;
24429 if (TARGET_XOP)
24431 /* The XOP VPPERM insn supports three inputs. By ignoring the
24432 one_operand_shuffle special case, we avoid creating another
24433 set of constant vectors in memory. */
24434 one_operand_shuffle = false;
24436 /* mask = mask & {2*w-1, ...} */
24437 vt = GEN_INT (2*w - 1);
24439 else
24441 /* mask = mask & {w-1, ...} */
24442 vt = GEN_INT (w - 1);
24445 vt = gen_const_vec_duplicate (maskmode, vt);
24446 mask = expand_simple_binop (maskmode, AND, mask, vt,
24447 NULL_RTX, 0, OPTAB_DIRECT);
24449 /* For non-QImode operations, convert the word permutation control
24450 into a byte permutation control. */
24451 if (mode != V16QImode)
24453 mask = expand_simple_binop (maskmode, ASHIFT, mask,
24454 GEN_INT (exact_log2 (e)),
24455 NULL_RTX, 0, OPTAB_DIRECT);
24457 /* Convert mask to vector of chars. */
24458 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
24460 /* Replicate each of the input bytes into byte positions:
24461 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
24462 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
24463 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
24464 for (i = 0; i < 16; ++i)
24465 vec[i] = GEN_INT (i/e * e);
24466 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
24467 vt = validize_mem (force_const_mem (V16QImode, vt));
24468 if (TARGET_XOP)
24469 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
24470 else
24471 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
24473 /* Convert it into the byte positions by doing
24474 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
24475 for (i = 0; i < 16; ++i)
24476 vec[i] = GEN_INT (i % e);
24477 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
24478 vt = validize_mem (force_const_mem (V16QImode, vt));
24479 emit_insn (gen_addv16qi3 (mask, mask, vt));
24482 /* The actual shuffle operations all operate on V16QImode. */
24483 op0 = gen_lowpart (V16QImode, op0);
24484 op1 = gen_lowpart (V16QImode, op1);
24486 if (TARGET_XOP)
24488 if (GET_MODE (target) != V16QImode)
24489 target = gen_reg_rtx (V16QImode);
24490 emit_insn (gen_xop_pperm (target, op0, op1, mask));
24491 if (target != operands[0])
24492 emit_move_insn (operands[0],
24493 gen_lowpart (GET_MODE (operands[0]), target));
24495 else if (one_operand_shuffle)
24497 if (GET_MODE (target) != V16QImode)
24498 target = gen_reg_rtx (V16QImode);
24499 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
24500 if (target != operands[0])
24501 emit_move_insn (operands[0],
24502 gen_lowpart (GET_MODE (operands[0]), target));
24504 else
24506 rtx xops[6];
24507 bool ok;
24509 /* Shuffle the two input vectors independently. */
24510 t1 = gen_reg_rtx (V16QImode);
24511 t2 = gen_reg_rtx (V16QImode);
24512 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
24513 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
24515 merge_two:
24516 /* Then merge them together. The key is whether any given control
24517 element contained a bit set that indicates the second word. */
24518 mask = operands[3];
24519 vt = GEN_INT (w);
24520 if (maskmode == V2DImode && !TARGET_SSE4_1)
24522 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
24523 more shuffle to convert the V2DI input mask into a V4SI
24524 input mask. At which point the masking that expand_int_vcond
24525 will work as desired. */
24526 rtx t3 = gen_reg_rtx (V4SImode);
24527 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
24528 const0_rtx, const0_rtx,
24529 const2_rtx, const2_rtx));
24530 mask = t3;
24531 maskmode = V4SImode;
24532 e = w = 4;
24535 vt = gen_const_vec_duplicate (maskmode, vt);
24536 vt = force_reg (maskmode, vt);
24537 mask = expand_simple_binop (maskmode, AND, mask, vt,
24538 NULL_RTX, 0, OPTAB_DIRECT);
24540 if (GET_MODE (target) != mode)
24541 target = gen_reg_rtx (mode);
24542 xops[0] = target;
24543 xops[1] = gen_lowpart (mode, t2);
24544 xops[2] = gen_lowpart (mode, t1);
24545 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
24546 xops[4] = mask;
24547 xops[5] = vt;
24548 ok = ix86_expand_int_vcond (xops);
24549 gcc_assert (ok);
24550 if (target != operands[0])
24551 emit_move_insn (operands[0],
24552 gen_lowpart (GET_MODE (operands[0]), target));
24556 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
24557 true if we should do zero extension, else sign extension. HIGH_P is
24558 true if we want the N/2 high elements, else the low elements. */
24560 void
24561 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
24563 machine_mode imode = GET_MODE (src);
24564 rtx tmp;
24566 if (TARGET_SSE4_1)
24568 rtx (*unpack)(rtx, rtx);
24569 rtx (*extract)(rtx, rtx) = NULL;
24570 machine_mode halfmode = BLKmode;
24572 switch (imode)
24574 case E_V64QImode:
24575 if (unsigned_p)
24576 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
24577 else
24578 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
24579 halfmode = V32QImode;
24580 extract
24581 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
24582 break;
24583 case E_V32QImode:
24584 if (unsigned_p)
24585 unpack = gen_avx2_zero_extendv16qiv16hi2;
24586 else
24587 unpack = gen_avx2_sign_extendv16qiv16hi2;
24588 halfmode = V16QImode;
24589 extract
24590 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
24591 break;
24592 case E_V32HImode:
24593 if (unsigned_p)
24594 unpack = gen_avx512f_zero_extendv16hiv16si2;
24595 else
24596 unpack = gen_avx512f_sign_extendv16hiv16si2;
24597 halfmode = V16HImode;
24598 extract
24599 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
24600 break;
24601 case E_V16HImode:
24602 if (unsigned_p)
24603 unpack = gen_avx2_zero_extendv8hiv8si2;
24604 else
24605 unpack = gen_avx2_sign_extendv8hiv8si2;
24606 halfmode = V8HImode;
24607 extract
24608 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
24609 break;
24610 case E_V16SImode:
24611 if (unsigned_p)
24612 unpack = gen_avx512f_zero_extendv8siv8di2;
24613 else
24614 unpack = gen_avx512f_sign_extendv8siv8di2;
24615 halfmode = V8SImode;
24616 extract
24617 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
24618 break;
24619 case E_V8SImode:
24620 if (unsigned_p)
24621 unpack = gen_avx2_zero_extendv4siv4di2;
24622 else
24623 unpack = gen_avx2_sign_extendv4siv4di2;
24624 halfmode = V4SImode;
24625 extract
24626 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
24627 break;
24628 case E_V16QImode:
24629 if (unsigned_p)
24630 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
24631 else
24632 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
24633 break;
24634 case E_V8HImode:
24635 if (unsigned_p)
24636 unpack = gen_sse4_1_zero_extendv4hiv4si2;
24637 else
24638 unpack = gen_sse4_1_sign_extendv4hiv4si2;
24639 break;
24640 case E_V4SImode:
24641 if (unsigned_p)
24642 unpack = gen_sse4_1_zero_extendv2siv2di2;
24643 else
24644 unpack = gen_sse4_1_sign_extendv2siv2di2;
24645 break;
24646 default:
24647 gcc_unreachable ();
24650 if (GET_MODE_SIZE (imode) >= 32)
24652 tmp = gen_reg_rtx (halfmode);
24653 emit_insn (extract (tmp, src));
24655 else if (high_p)
24657 /* Shift higher 8 bytes to lower 8 bytes. */
24658 tmp = gen_reg_rtx (V1TImode);
24659 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
24660 GEN_INT (64)));
24661 tmp = gen_lowpart (imode, tmp);
24663 else
24664 tmp = src;
24666 emit_insn (unpack (dest, tmp));
24668 else
24670 rtx (*unpack)(rtx, rtx, rtx);
24672 switch (imode)
24674 case E_V16QImode:
24675 if (high_p)
24676 unpack = gen_vec_interleave_highv16qi;
24677 else
24678 unpack = gen_vec_interleave_lowv16qi;
24679 break;
24680 case E_V8HImode:
24681 if (high_p)
24682 unpack = gen_vec_interleave_highv8hi;
24683 else
24684 unpack = gen_vec_interleave_lowv8hi;
24685 break;
24686 case E_V4SImode:
24687 if (high_p)
24688 unpack = gen_vec_interleave_highv4si;
24689 else
24690 unpack = gen_vec_interleave_lowv4si;
24691 break;
24692 default:
24693 gcc_unreachable ();
24696 if (unsigned_p)
24697 tmp = force_reg (imode, CONST0_RTX (imode));
24698 else
24699 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
24700 src, pc_rtx, pc_rtx);
24702 rtx tmp2 = gen_reg_rtx (imode);
24703 emit_insn (unpack (tmp2, src, tmp));
24704 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
24708 /* Expand conditional increment or decrement using adb/sbb instructions.
24709 The default case using setcc followed by the conditional move can be
24710 done by generic code. */
24711 bool
24712 ix86_expand_int_addcc (rtx operands[])
24714 enum rtx_code code = GET_CODE (operands[1]);
24715 rtx flags;
24716 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
24717 rtx compare_op;
24718 rtx val = const0_rtx;
24719 bool fpcmp = false;
24720 machine_mode mode;
24721 rtx op0 = XEXP (operands[1], 0);
24722 rtx op1 = XEXP (operands[1], 1);
24724 if (operands[3] != const1_rtx
24725 && operands[3] != constm1_rtx)
24726 return false;
24727 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
24728 return false;
24729 code = GET_CODE (compare_op);
24731 flags = XEXP (compare_op, 0);
24733 if (GET_MODE (flags) == CCFPmode)
24735 fpcmp = true;
24736 code = ix86_fp_compare_code_to_integer (code);
24739 if (code != LTU)
24741 val = constm1_rtx;
24742 if (fpcmp)
24743 PUT_CODE (compare_op,
24744 reverse_condition_maybe_unordered
24745 (GET_CODE (compare_op)));
24746 else
24747 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
24750 mode = GET_MODE (operands[0]);
24752 /* Construct either adc or sbb insn. */
24753 if ((code == LTU) == (operands[3] == constm1_rtx))
24755 switch (mode)
24757 case E_QImode:
24758 insn = gen_subqi3_carry;
24759 break;
24760 case E_HImode:
24761 insn = gen_subhi3_carry;
24762 break;
24763 case E_SImode:
24764 insn = gen_subsi3_carry;
24765 break;
24766 case E_DImode:
24767 insn = gen_subdi3_carry;
24768 break;
24769 default:
24770 gcc_unreachable ();
24773 else
24775 switch (mode)
24777 case E_QImode:
24778 insn = gen_addqi3_carry;
24779 break;
24780 case E_HImode:
24781 insn = gen_addhi3_carry;
24782 break;
24783 case E_SImode:
24784 insn = gen_addsi3_carry;
24785 break;
24786 case E_DImode:
24787 insn = gen_adddi3_carry;
24788 break;
24789 default:
24790 gcc_unreachable ();
24793 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
24795 return true;
24799 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
24800 but works for floating pointer parameters and nonoffsetable memories.
24801 For pushes, it returns just stack offsets; the values will be saved
24802 in the right order. Maximally three parts are generated. */
24804 static int
24805 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
24807 int size;
24809 if (!TARGET_64BIT)
24810 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
24811 else
24812 size = (GET_MODE_SIZE (mode) + 4) / 8;
24814 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
24815 gcc_assert (size >= 2 && size <= 4);
24817 /* Optimize constant pool reference to immediates. This is used by fp
24818 moves, that force all constants to memory to allow combining. */
24819 if (MEM_P (operand) && MEM_READONLY_P (operand))
24820 operand = avoid_constant_pool_reference (operand);
24822 if (MEM_P (operand) && !offsettable_memref_p (operand))
24824 /* The only non-offsetable memories we handle are pushes. */
24825 int ok = push_operand (operand, VOIDmode);
24827 gcc_assert (ok);
24829 operand = copy_rtx (operand);
24830 PUT_MODE (operand, word_mode);
24831 parts[0] = parts[1] = parts[2] = parts[3] = operand;
24832 return size;
24835 if (GET_CODE (operand) == CONST_VECTOR)
24837 scalar_int_mode imode = int_mode_for_mode (mode).require ();
24838 /* Caution: if we looked through a constant pool memory above,
24839 the operand may actually have a different mode now. That's
24840 ok, since we want to pun this all the way back to an integer. */
24841 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
24842 gcc_assert (operand != NULL);
24843 mode = imode;
24846 if (!TARGET_64BIT)
24848 if (mode == DImode)
24849 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24850 else
24852 int i;
24854 if (REG_P (operand))
24856 gcc_assert (reload_completed);
24857 for (i = 0; i < size; i++)
24858 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
24860 else if (offsettable_memref_p (operand))
24862 operand = adjust_address (operand, SImode, 0);
24863 parts[0] = operand;
24864 for (i = 1; i < size; i++)
24865 parts[i] = adjust_address (operand, SImode, 4 * i);
24867 else if (CONST_DOUBLE_P (operand))
24869 const REAL_VALUE_TYPE *r;
24870 long l[4];
24872 r = CONST_DOUBLE_REAL_VALUE (operand);
24873 switch (mode)
24875 case E_TFmode:
24876 real_to_target (l, r, mode);
24877 parts[3] = gen_int_mode (l[3], SImode);
24878 parts[2] = gen_int_mode (l[2], SImode);
24879 break;
24880 case E_XFmode:
24881 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
24882 long double may not be 80-bit. */
24883 real_to_target (l, r, mode);
24884 parts[2] = gen_int_mode (l[2], SImode);
24885 break;
24886 case E_DFmode:
24887 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
24888 break;
24889 default:
24890 gcc_unreachable ();
24892 parts[1] = gen_int_mode (l[1], SImode);
24893 parts[0] = gen_int_mode (l[0], SImode);
24895 else
24896 gcc_unreachable ();
24899 else
24901 if (mode == TImode)
24902 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24903 if (mode == XFmode || mode == TFmode)
24905 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
24906 if (REG_P (operand))
24908 gcc_assert (reload_completed);
24909 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
24910 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
24912 else if (offsettable_memref_p (operand))
24914 operand = adjust_address (operand, DImode, 0);
24915 parts[0] = operand;
24916 parts[1] = adjust_address (operand, upper_mode, 8);
24918 else if (CONST_DOUBLE_P (operand))
24920 long l[4];
24922 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
24924 /* real_to_target puts 32-bit pieces in each long. */
24925 parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff))
24926 | ((l[1] & HOST_WIDE_INT_C (0xffffffff))
24927 << 32), DImode);
24929 if (upper_mode == SImode)
24930 parts[1] = gen_int_mode (l[2], SImode);
24931 else
24932 parts[1]
24933 = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff))
24934 | ((l[3] & HOST_WIDE_INT_C (0xffffffff))
24935 << 32), DImode);
24937 else
24938 gcc_unreachable ();
24942 return size;
24945 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
24946 Return false when normal moves are needed; true when all required
24947 insns have been emitted. Operands 2-4 contain the input values
24948 int the correct order; operands 5-7 contain the output values. */
24950 void
24951 ix86_split_long_move (rtx operands[])
24953 rtx part[2][4];
24954 int nparts, i, j;
24955 int push = 0;
24956 int collisions = 0;
24957 machine_mode mode = GET_MODE (operands[0]);
24958 bool collisionparts[4];
24960 /* The DFmode expanders may ask us to move double.
24961 For 64bit target this is single move. By hiding the fact
24962 here we simplify i386.md splitters. */
24963 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
24965 /* Optimize constant pool reference to immediates. This is used by
24966 fp moves, that force all constants to memory to allow combining. */
24968 if (MEM_P (operands[1])
24969 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
24970 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
24971 operands[1] = get_pool_constant (XEXP (operands[1], 0));
24972 if (push_operand (operands[0], VOIDmode))
24974 operands[0] = copy_rtx (operands[0]);
24975 PUT_MODE (operands[0], word_mode);
24977 else
24978 operands[0] = gen_lowpart (DImode, operands[0]);
24979 operands[1] = gen_lowpart (DImode, operands[1]);
24980 emit_move_insn (operands[0], operands[1]);
24981 return;
24984 /* The only non-offsettable memory we handle is push. */
24985 if (push_operand (operands[0], VOIDmode))
24986 push = 1;
24987 else
24988 gcc_assert (!MEM_P (operands[0])
24989 || offsettable_memref_p (operands[0]));
24991 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
24992 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
24994 /* When emitting push, take care for source operands on the stack. */
24995 if (push && MEM_P (operands[1])
24996 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
24998 rtx src_base = XEXP (part[1][nparts - 1], 0);
25000 /* Compensate for the stack decrement by 4. */
25001 if (!TARGET_64BIT && nparts == 3
25002 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
25003 src_base = plus_constant (Pmode, src_base, 4);
25005 /* src_base refers to the stack pointer and is
25006 automatically decreased by emitted push. */
25007 for (i = 0; i < nparts; i++)
25008 part[1][i] = change_address (part[1][i],
25009 GET_MODE (part[1][i]), src_base);
25012 /* We need to do copy in the right order in case an address register
25013 of the source overlaps the destination. */
25014 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
25016 rtx tmp;
25018 for (i = 0; i < nparts; i++)
25020 collisionparts[i]
25021 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
25022 if (collisionparts[i])
25023 collisions++;
25026 /* Collision in the middle part can be handled by reordering. */
25027 if (collisions == 1 && nparts == 3 && collisionparts [1])
25029 std::swap (part[0][1], part[0][2]);
25030 std::swap (part[1][1], part[1][2]);
25032 else if (collisions == 1
25033 && nparts == 4
25034 && (collisionparts [1] || collisionparts [2]))
25036 if (collisionparts [1])
25038 std::swap (part[0][1], part[0][2]);
25039 std::swap (part[1][1], part[1][2]);
25041 else
25043 std::swap (part[0][2], part[0][3]);
25044 std::swap (part[1][2], part[1][3]);
25048 /* If there are more collisions, we can't handle it by reordering.
25049 Do an lea to the last part and use only one colliding move. */
25050 else if (collisions > 1)
25052 rtx base, addr;
25054 collisions = 1;
25056 base = part[0][nparts - 1];
25058 /* Handle the case when the last part isn't valid for lea.
25059 Happens in 64-bit mode storing the 12-byte XFmode. */
25060 if (GET_MODE (base) != Pmode)
25061 base = gen_rtx_REG (Pmode, REGNO (base));
25063 addr = XEXP (part[1][0], 0);
25064 if (TARGET_TLS_DIRECT_SEG_REFS)
25066 struct ix86_address parts;
25067 int ok = ix86_decompose_address (addr, &parts);
25068 gcc_assert (ok);
25069 /* It is not valid to use %gs: or %fs: in lea. */
25070 gcc_assert (parts.seg == ADDR_SPACE_GENERIC);
25072 emit_insn (gen_rtx_SET (base, addr));
25073 part[1][0] = replace_equiv_address (part[1][0], base);
25074 for (i = 1; i < nparts; i++)
25076 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
25077 part[1][i] = replace_equiv_address (part[1][i], tmp);
25082 if (push)
25084 if (!TARGET_64BIT)
25086 if (nparts == 3)
25088 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
25089 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
25090 stack_pointer_rtx, GEN_INT (-4)));
25091 emit_move_insn (part[0][2], part[1][2]);
25093 else if (nparts == 4)
25095 emit_move_insn (part[0][3], part[1][3]);
25096 emit_move_insn (part[0][2], part[1][2]);
25099 else
25101 /* In 64bit mode we don't have 32bit push available. In case this is
25102 register, it is OK - we will just use larger counterpart. We also
25103 retype memory - these comes from attempt to avoid REX prefix on
25104 moving of second half of TFmode value. */
25105 if (GET_MODE (part[1][1]) == SImode)
25107 switch (GET_CODE (part[1][1]))
25109 case MEM:
25110 part[1][1] = adjust_address (part[1][1], DImode, 0);
25111 break;
25113 case REG:
25114 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
25115 break;
25117 default:
25118 gcc_unreachable ();
25121 if (GET_MODE (part[1][0]) == SImode)
25122 part[1][0] = part[1][1];
25125 emit_move_insn (part[0][1], part[1][1]);
25126 emit_move_insn (part[0][0], part[1][0]);
25127 return;
25130 /* Choose correct order to not overwrite the source before it is copied. */
25131 if ((REG_P (part[0][0])
25132 && REG_P (part[1][1])
25133 && (REGNO (part[0][0]) == REGNO (part[1][1])
25134 || (nparts == 3
25135 && REGNO (part[0][0]) == REGNO (part[1][2]))
25136 || (nparts == 4
25137 && REGNO (part[0][0]) == REGNO (part[1][3]))))
25138 || (collisions > 0
25139 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
25141 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
25143 operands[2 + i] = part[0][j];
25144 operands[6 + i] = part[1][j];
25147 else
25149 for (i = 0; i < nparts; i++)
25151 operands[2 + i] = part[0][i];
25152 operands[6 + i] = part[1][i];
25156 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
25157 if (optimize_insn_for_size_p ())
25159 for (j = 0; j < nparts - 1; j++)
25160 if (CONST_INT_P (operands[6 + j])
25161 && operands[6 + j] != const0_rtx
25162 && REG_P (operands[2 + j]))
25163 for (i = j; i < nparts - 1; i++)
25164 if (CONST_INT_P (operands[7 + i])
25165 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
25166 operands[7 + i] = operands[2 + j];
25169 for (i = 0; i < nparts; i++)
25170 emit_move_insn (operands[2 + i], operands[6 + i]);
25172 return;
25175 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
25176 left shift by a constant, either using a single shift or
25177 a sequence of add instructions. */
25179 static void
25180 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
25182 rtx (*insn)(rtx, rtx, rtx);
25184 if (count == 1
25185 || (count * ix86_cost->add <= ix86_cost->shift_const
25186 && !optimize_insn_for_size_p ()))
25188 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
25189 while (count-- > 0)
25190 emit_insn (insn (operand, operand, operand));
25192 else
25194 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25195 emit_insn (insn (operand, operand, GEN_INT (count)));
25199 void
25200 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
25202 rtx (*gen_ashl3)(rtx, rtx, rtx);
25203 rtx (*gen_shld)(rtx, rtx, rtx);
25204 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25206 rtx low[2], high[2];
25207 int count;
25209 if (CONST_INT_P (operands[2]))
25211 split_double_mode (mode, operands, 2, low, high);
25212 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25214 if (count >= half_width)
25216 emit_move_insn (high[0], low[1]);
25217 emit_move_insn (low[0], const0_rtx);
25219 if (count > half_width)
25220 ix86_expand_ashl_const (high[0], count - half_width, mode);
25222 else
25224 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
25226 if (!rtx_equal_p (operands[0], operands[1]))
25227 emit_move_insn (operands[0], operands[1]);
25229 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
25230 ix86_expand_ashl_const (low[0], count, mode);
25232 return;
25235 split_double_mode (mode, operands, 1, low, high);
25237 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25239 if (operands[1] == const1_rtx)
25241 /* Assuming we've chosen a QImode capable registers, then 1 << N
25242 can be done with two 32/64-bit shifts, no branches, no cmoves. */
25243 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
25245 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
25247 ix86_expand_clear (low[0]);
25248 ix86_expand_clear (high[0]);
25249 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
25251 d = gen_lowpart (QImode, low[0]);
25252 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
25253 s = gen_rtx_EQ (QImode, flags, const0_rtx);
25254 emit_insn (gen_rtx_SET (d, s));
25256 d = gen_lowpart (QImode, high[0]);
25257 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
25258 s = gen_rtx_NE (QImode, flags, const0_rtx);
25259 emit_insn (gen_rtx_SET (d, s));
25262 /* Otherwise, we can get the same results by manually performing
25263 a bit extract operation on bit 5/6, and then performing the two
25264 shifts. The two methods of getting 0/1 into low/high are exactly
25265 the same size. Avoiding the shift in the bit extract case helps
25266 pentium4 a bit; no one else seems to care much either way. */
25267 else
25269 machine_mode half_mode;
25270 rtx (*gen_lshr3)(rtx, rtx, rtx);
25271 rtx (*gen_and3)(rtx, rtx, rtx);
25272 rtx (*gen_xor3)(rtx, rtx, rtx);
25273 HOST_WIDE_INT bits;
25274 rtx x;
25276 if (mode == DImode)
25278 half_mode = SImode;
25279 gen_lshr3 = gen_lshrsi3;
25280 gen_and3 = gen_andsi3;
25281 gen_xor3 = gen_xorsi3;
25282 bits = 5;
25284 else
25286 half_mode = DImode;
25287 gen_lshr3 = gen_lshrdi3;
25288 gen_and3 = gen_anddi3;
25289 gen_xor3 = gen_xordi3;
25290 bits = 6;
25293 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
25294 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
25295 else
25296 x = gen_lowpart (half_mode, operands[2]);
25297 emit_insn (gen_rtx_SET (high[0], x));
25299 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
25300 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
25301 emit_move_insn (low[0], high[0]);
25302 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
25305 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
25306 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
25307 return;
25310 if (operands[1] == constm1_rtx)
25312 /* For -1 << N, we can avoid the shld instruction, because we
25313 know that we're shifting 0...31/63 ones into a -1. */
25314 emit_move_insn (low[0], constm1_rtx);
25315 if (optimize_insn_for_size_p ())
25316 emit_move_insn (high[0], low[0]);
25317 else
25318 emit_move_insn (high[0], constm1_rtx);
25320 else
25322 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
25324 if (!rtx_equal_p (operands[0], operands[1]))
25325 emit_move_insn (operands[0], operands[1]);
25327 split_double_mode (mode, operands, 1, low, high);
25328 emit_insn (gen_shld (high[0], low[0], operands[2]));
25331 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
25333 if (TARGET_CMOVE && scratch)
25335 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25336 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25338 ix86_expand_clear (scratch);
25339 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
25341 else
25343 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
25344 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
25346 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
25350 void
25351 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
25353 rtx (*gen_ashr3)(rtx, rtx, rtx)
25354 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
25355 rtx (*gen_shrd)(rtx, rtx, rtx);
25356 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25358 rtx low[2], high[2];
25359 int count;
25361 if (CONST_INT_P (operands[2]))
25363 split_double_mode (mode, operands, 2, low, high);
25364 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25366 if (count == GET_MODE_BITSIZE (mode) - 1)
25368 emit_move_insn (high[0], high[1]);
25369 emit_insn (gen_ashr3 (high[0], high[0],
25370 GEN_INT (half_width - 1)));
25371 emit_move_insn (low[0], high[0]);
25374 else if (count >= half_width)
25376 emit_move_insn (low[0], high[1]);
25377 emit_move_insn (high[0], low[0]);
25378 emit_insn (gen_ashr3 (high[0], high[0],
25379 GEN_INT (half_width - 1)));
25381 if (count > half_width)
25382 emit_insn (gen_ashr3 (low[0], low[0],
25383 GEN_INT (count - half_width)));
25385 else
25387 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25389 if (!rtx_equal_p (operands[0], operands[1]))
25390 emit_move_insn (operands[0], operands[1]);
25392 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
25393 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
25396 else
25398 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25400 if (!rtx_equal_p (operands[0], operands[1]))
25401 emit_move_insn (operands[0], operands[1]);
25403 split_double_mode (mode, operands, 1, low, high);
25405 emit_insn (gen_shrd (low[0], high[0], operands[2]));
25406 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
25408 if (TARGET_CMOVE && scratch)
25410 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25411 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25413 emit_move_insn (scratch, high[0]);
25414 emit_insn (gen_ashr3 (scratch, scratch,
25415 GEN_INT (half_width - 1)));
25416 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
25417 scratch));
25419 else
25421 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
25422 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
25424 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
25429 void
25430 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
25432 rtx (*gen_lshr3)(rtx, rtx, rtx)
25433 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
25434 rtx (*gen_shrd)(rtx, rtx, rtx);
25435 int half_width = GET_MODE_BITSIZE (mode) >> 1;
25437 rtx low[2], high[2];
25438 int count;
25440 if (CONST_INT_P (operands[2]))
25442 split_double_mode (mode, operands, 2, low, high);
25443 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25445 if (count >= half_width)
25447 emit_move_insn (low[0], high[1]);
25448 ix86_expand_clear (high[0]);
25450 if (count > half_width)
25451 emit_insn (gen_lshr3 (low[0], low[0],
25452 GEN_INT (count - half_width)));
25454 else
25456 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25458 if (!rtx_equal_p (operands[0], operands[1]))
25459 emit_move_insn (operands[0], operands[1]);
25461 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
25462 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
25465 else
25467 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
25469 if (!rtx_equal_p (operands[0], operands[1]))
25470 emit_move_insn (operands[0], operands[1]);
25472 split_double_mode (mode, operands, 1, low, high);
25474 emit_insn (gen_shrd (low[0], high[0], operands[2]));
25475 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
25477 if (TARGET_CMOVE && scratch)
25479 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
25480 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
25482 ix86_expand_clear (scratch);
25483 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
25484 scratch));
25486 else
25488 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
25489 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
25491 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
25496 /* Predict just emitted jump instruction to be taken with probability PROB. */
25497 static void
25498 predict_jump (int prob)
25500 rtx_insn *insn = get_last_insn ();
25501 gcc_assert (JUMP_P (insn));
25502 add_reg_br_prob_note (insn, profile_probability::from_reg_br_prob_base (prob));
25505 /* Helper function for the string operations below. Dest VARIABLE whether
25506 it is aligned to VALUE bytes. If true, jump to the label. */
25507 static rtx_code_label *
25508 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
25510 rtx_code_label *label = gen_label_rtx ();
25511 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
25512 if (GET_MODE (variable) == DImode)
25513 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
25514 else
25515 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
25516 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
25517 1, label);
25518 if (epilogue)
25519 predict_jump (REG_BR_PROB_BASE * 50 / 100);
25520 else
25521 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25522 return label;
25525 /* Adjust COUNTER by the VALUE. */
25526 static void
25527 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
25529 rtx (*gen_add)(rtx, rtx, rtx)
25530 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
25532 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
25535 /* Zero extend possibly SImode EXP to Pmode register. */
25537 ix86_zero_extend_to_Pmode (rtx exp)
25539 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
25542 /* Divide COUNTREG by SCALE. */
25543 static rtx
25544 scale_counter (rtx countreg, int scale)
25546 rtx sc;
25548 if (scale == 1)
25549 return countreg;
25550 if (CONST_INT_P (countreg))
25551 return GEN_INT (INTVAL (countreg) / scale);
25552 gcc_assert (REG_P (countreg));
25554 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
25555 GEN_INT (exact_log2 (scale)),
25556 NULL, 1, OPTAB_DIRECT);
25557 return sc;
25560 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
25561 DImode for constant loop counts. */
25563 static machine_mode
25564 counter_mode (rtx count_exp)
25566 if (GET_MODE (count_exp) != VOIDmode)
25567 return GET_MODE (count_exp);
25568 if (!CONST_INT_P (count_exp))
25569 return Pmode;
25570 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
25571 return DImode;
25572 return SImode;
25575 /* Copy the address to a Pmode register. This is used for x32 to
25576 truncate DImode TLS address to a SImode register. */
25578 static rtx
25579 ix86_copy_addr_to_reg (rtx addr)
25581 rtx reg;
25582 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
25584 reg = copy_addr_to_reg (addr);
25585 REG_POINTER (reg) = 1;
25586 return reg;
25588 else
25590 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
25591 reg = copy_to_mode_reg (DImode, addr);
25592 REG_POINTER (reg) = 1;
25593 return gen_rtx_SUBREG (SImode, reg, 0);
25597 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
25598 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
25599 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
25600 memory by VALUE (supposed to be in MODE).
25602 The size is rounded down to whole number of chunk size moved at once.
25603 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
25606 static void
25607 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
25608 rtx destptr, rtx srcptr, rtx value,
25609 rtx count, machine_mode mode, int unroll,
25610 int expected_size, bool issetmem)
25612 rtx_code_label *out_label, *top_label;
25613 rtx iter, tmp;
25614 machine_mode iter_mode = counter_mode (count);
25615 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
25616 rtx piece_size = GEN_INT (piece_size_n);
25617 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
25618 rtx size;
25619 int i;
25621 top_label = gen_label_rtx ();
25622 out_label = gen_label_rtx ();
25623 iter = gen_reg_rtx (iter_mode);
25625 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
25626 NULL, 1, OPTAB_DIRECT);
25627 /* Those two should combine. */
25628 if (piece_size == const1_rtx)
25630 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
25631 true, out_label);
25632 predict_jump (REG_BR_PROB_BASE * 10 / 100);
25634 emit_move_insn (iter, const0_rtx);
25636 emit_label (top_label);
25638 tmp = convert_modes (Pmode, iter_mode, iter, true);
25640 /* This assert could be relaxed - in this case we'll need to compute
25641 smallest power of two, containing in PIECE_SIZE_N and pass it to
25642 offset_address. */
25643 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
25644 destmem = offset_address (destmem, tmp, piece_size_n);
25645 destmem = adjust_address (destmem, mode, 0);
25647 if (!issetmem)
25649 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
25650 srcmem = adjust_address (srcmem, mode, 0);
25652 /* When unrolling for chips that reorder memory reads and writes,
25653 we can save registers by using single temporary.
25654 Also using 4 temporaries is overkill in 32bit mode. */
25655 if (!TARGET_64BIT && 0)
25657 for (i = 0; i < unroll; i++)
25659 if (i)
25661 destmem =
25662 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25663 srcmem =
25664 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25666 emit_move_insn (destmem, srcmem);
25669 else
25671 rtx tmpreg[4];
25672 gcc_assert (unroll <= 4);
25673 for (i = 0; i < unroll; i++)
25675 tmpreg[i] = gen_reg_rtx (mode);
25676 if (i)
25678 srcmem =
25679 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25681 emit_move_insn (tmpreg[i], srcmem);
25683 for (i = 0; i < unroll; i++)
25685 if (i)
25687 destmem =
25688 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25690 emit_move_insn (destmem, tmpreg[i]);
25694 else
25695 for (i = 0; i < unroll; i++)
25697 if (i)
25698 destmem =
25699 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25700 emit_move_insn (destmem, value);
25703 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
25704 true, OPTAB_LIB_WIDEN);
25705 if (tmp != iter)
25706 emit_move_insn (iter, tmp);
25708 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
25709 true, top_label);
25710 if (expected_size != -1)
25712 expected_size /= GET_MODE_SIZE (mode) * unroll;
25713 if (expected_size == 0)
25714 predict_jump (0);
25715 else if (expected_size > REG_BR_PROB_BASE)
25716 predict_jump (REG_BR_PROB_BASE - 1);
25717 else
25718 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
25720 else
25721 predict_jump (REG_BR_PROB_BASE * 80 / 100);
25722 iter = ix86_zero_extend_to_Pmode (iter);
25723 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
25724 true, OPTAB_LIB_WIDEN);
25725 if (tmp != destptr)
25726 emit_move_insn (destptr, tmp);
25727 if (!issetmem)
25729 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
25730 true, OPTAB_LIB_WIDEN);
25731 if (tmp != srcptr)
25732 emit_move_insn (srcptr, tmp);
25734 emit_label (out_label);
25737 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
25738 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
25739 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
25740 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
25741 ORIG_VALUE is the original value passed to memset to fill the memory with.
25742 Other arguments have same meaning as for previous function. */
25744 static void
25745 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
25746 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
25747 rtx count,
25748 machine_mode mode, bool issetmem)
25750 rtx destexp;
25751 rtx srcexp;
25752 rtx countreg;
25753 HOST_WIDE_INT rounded_count;
25755 /* If possible, it is shorter to use rep movs.
25756 TODO: Maybe it is better to move this logic to decide_alg. */
25757 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
25758 && (!issetmem || orig_value == const0_rtx))
25759 mode = SImode;
25761 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
25762 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
25764 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
25765 GET_MODE_SIZE (mode)));
25766 if (mode != QImode)
25768 destexp = gen_rtx_ASHIFT (Pmode, countreg,
25769 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25770 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
25772 else
25773 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
25774 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
25776 rounded_count
25777 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25778 destmem = shallow_copy_rtx (destmem);
25779 set_mem_size (destmem, rounded_count);
25781 else if (MEM_SIZE_KNOWN_P (destmem))
25782 clear_mem_size (destmem);
25784 if (issetmem)
25786 value = force_reg (mode, gen_lowpart (mode, value));
25787 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
25789 else
25791 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
25792 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
25793 if (mode != QImode)
25795 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
25796 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25797 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
25799 else
25800 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
25801 if (CONST_INT_P (count))
25803 rounded_count
25804 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25805 srcmem = shallow_copy_rtx (srcmem);
25806 set_mem_size (srcmem, rounded_count);
25808 else
25810 if (MEM_SIZE_KNOWN_P (srcmem))
25811 clear_mem_size (srcmem);
25813 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
25814 destexp, srcexp));
25818 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
25819 DESTMEM.
25820 SRC is passed by pointer to be updated on return.
25821 Return value is updated DST. */
25822 static rtx
25823 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
25824 HOST_WIDE_INT size_to_move)
25826 rtx dst = destmem, src = *srcmem, adjust, tempreg;
25827 enum insn_code code;
25828 machine_mode move_mode;
25829 int piece_size, i;
25831 /* Find the widest mode in which we could perform moves.
25832 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25833 it until move of such size is supported. */
25834 piece_size = 1 << floor_log2 (size_to_move);
25835 while (!int_mode_for_size (piece_size * BITS_PER_UNIT, 0).exists (&move_mode)
25836 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
25838 gcc_assert (piece_size > 1);
25839 piece_size >>= 1;
25842 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25843 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25844 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25846 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25847 if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
25848 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
25850 move_mode = word_mode;
25851 piece_size = GET_MODE_SIZE (move_mode);
25852 code = optab_handler (mov_optab, move_mode);
25855 gcc_assert (code != CODE_FOR_nothing);
25857 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25858 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
25860 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25861 gcc_assert (size_to_move % piece_size == 0);
25862 adjust = GEN_INT (piece_size);
25863 for (i = 0; i < size_to_move; i += piece_size)
25865 /* We move from memory to memory, so we'll need to do it via
25866 a temporary register. */
25867 tempreg = gen_reg_rtx (move_mode);
25868 emit_insn (GEN_FCN (code) (tempreg, src));
25869 emit_insn (GEN_FCN (code) (dst, tempreg));
25871 emit_move_insn (destptr,
25872 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25873 emit_move_insn (srcptr,
25874 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
25876 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25877 piece_size);
25878 src = adjust_automodify_address_nv (src, move_mode, srcptr,
25879 piece_size);
25882 /* Update DST and SRC rtx. */
25883 *srcmem = src;
25884 return dst;
25887 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
25888 static void
25889 expand_movmem_epilogue (rtx destmem, rtx srcmem,
25890 rtx destptr, rtx srcptr, rtx count, int max_size)
25892 rtx src, dest;
25893 if (CONST_INT_P (count))
25895 HOST_WIDE_INT countval = INTVAL (count);
25896 HOST_WIDE_INT epilogue_size = countval % max_size;
25897 int i;
25899 /* For now MAX_SIZE should be a power of 2. This assert could be
25900 relaxed, but it'll require a bit more complicated epilogue
25901 expanding. */
25902 gcc_assert ((max_size & (max_size - 1)) == 0);
25903 for (i = max_size; i >= 1; i >>= 1)
25905 if (epilogue_size & i)
25906 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25908 return;
25910 if (max_size > 8)
25912 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
25913 count, 1, OPTAB_DIRECT);
25914 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
25915 count, QImode, 1, 4, false);
25916 return;
25919 /* When there are stringops, we can cheaply increase dest and src pointers.
25920 Otherwise we save code size by maintaining offset (zero is readily
25921 available from preceding rep operation) and using x86 addressing modes.
25923 if (TARGET_SINGLE_STRINGOP)
25925 if (max_size > 4)
25927 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25928 src = change_address (srcmem, SImode, srcptr);
25929 dest = change_address (destmem, SImode, destptr);
25930 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25931 emit_label (label);
25932 LABEL_NUSES (label) = 1;
25934 if (max_size > 2)
25936 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25937 src = change_address (srcmem, HImode, srcptr);
25938 dest = change_address (destmem, HImode, destptr);
25939 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25940 emit_label (label);
25941 LABEL_NUSES (label) = 1;
25943 if (max_size > 1)
25945 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25946 src = change_address (srcmem, QImode, srcptr);
25947 dest = change_address (destmem, QImode, destptr);
25948 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25949 emit_label (label);
25950 LABEL_NUSES (label) = 1;
25953 else
25955 rtx offset = force_reg (Pmode, const0_rtx);
25956 rtx tmp;
25958 if (max_size > 4)
25960 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25961 src = change_address (srcmem, SImode, srcptr);
25962 dest = change_address (destmem, SImode, destptr);
25963 emit_move_insn (dest, src);
25964 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
25965 true, OPTAB_LIB_WIDEN);
25966 if (tmp != offset)
25967 emit_move_insn (offset, tmp);
25968 emit_label (label);
25969 LABEL_NUSES (label) = 1;
25971 if (max_size > 2)
25973 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25974 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25975 src = change_address (srcmem, HImode, tmp);
25976 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25977 dest = change_address (destmem, HImode, tmp);
25978 emit_move_insn (dest, src);
25979 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
25980 true, OPTAB_LIB_WIDEN);
25981 if (tmp != offset)
25982 emit_move_insn (offset, tmp);
25983 emit_label (label);
25984 LABEL_NUSES (label) = 1;
25986 if (max_size > 1)
25988 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25989 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25990 src = change_address (srcmem, QImode, tmp);
25991 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25992 dest = change_address (destmem, QImode, tmp);
25993 emit_move_insn (dest, src);
25994 emit_label (label);
25995 LABEL_NUSES (label) = 1;
26000 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
26001 with value PROMOTED_VAL.
26002 SRC is passed by pointer to be updated on return.
26003 Return value is updated DST. */
26004 static rtx
26005 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
26006 HOST_WIDE_INT size_to_move)
26008 rtx dst = destmem, adjust;
26009 enum insn_code code;
26010 machine_mode move_mode;
26011 int piece_size, i;
26013 /* Find the widest mode in which we could perform moves.
26014 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
26015 it until move of such size is supported. */
26016 move_mode = GET_MODE (promoted_val);
26017 if (move_mode == VOIDmode)
26018 move_mode = QImode;
26019 if (size_to_move < GET_MODE_SIZE (move_mode))
26021 unsigned int move_bits = size_to_move * BITS_PER_UNIT;
26022 move_mode = int_mode_for_size (move_bits, 0).require ();
26023 promoted_val = gen_lowpart (move_mode, promoted_val);
26025 piece_size = GET_MODE_SIZE (move_mode);
26026 code = optab_handler (mov_optab, move_mode);
26027 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
26029 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
26031 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
26032 gcc_assert (size_to_move % piece_size == 0);
26033 adjust = GEN_INT (piece_size);
26034 for (i = 0; i < size_to_move; i += piece_size)
26036 if (piece_size <= GET_MODE_SIZE (word_mode))
26038 emit_insn (gen_strset (destptr, dst, promoted_val));
26039 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26040 piece_size);
26041 continue;
26044 emit_insn (GEN_FCN (code) (dst, promoted_val));
26046 emit_move_insn (destptr,
26047 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
26049 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26050 piece_size);
26053 /* Update DST rtx. */
26054 return dst;
26056 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26057 static void
26058 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
26059 rtx count, int max_size)
26061 count =
26062 expand_simple_binop (counter_mode (count), AND, count,
26063 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
26064 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
26065 gen_lowpart (QImode, value), count, QImode,
26066 1, max_size / 2, true);
26069 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
26070 static void
26071 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
26072 rtx count, int max_size)
26074 rtx dest;
26076 if (CONST_INT_P (count))
26078 HOST_WIDE_INT countval = INTVAL (count);
26079 HOST_WIDE_INT epilogue_size = countval % max_size;
26080 int i;
26082 /* For now MAX_SIZE should be a power of 2. This assert could be
26083 relaxed, but it'll require a bit more complicated epilogue
26084 expanding. */
26085 gcc_assert ((max_size & (max_size - 1)) == 0);
26086 for (i = max_size; i >= 1; i >>= 1)
26088 if (epilogue_size & i)
26090 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26091 destmem = emit_memset (destmem, destptr, vec_value, i);
26092 else
26093 destmem = emit_memset (destmem, destptr, value, i);
26096 return;
26098 if (max_size > 32)
26100 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
26101 return;
26103 if (max_size > 16)
26105 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
26106 if (TARGET_64BIT)
26108 dest = change_address (destmem, DImode, destptr);
26109 emit_insn (gen_strset (destptr, dest, value));
26110 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
26111 emit_insn (gen_strset (destptr, dest, value));
26113 else
26115 dest = change_address (destmem, SImode, destptr);
26116 emit_insn (gen_strset (destptr, dest, value));
26117 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26118 emit_insn (gen_strset (destptr, dest, value));
26119 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
26120 emit_insn (gen_strset (destptr, dest, value));
26121 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
26122 emit_insn (gen_strset (destptr, dest, value));
26124 emit_label (label);
26125 LABEL_NUSES (label) = 1;
26127 if (max_size > 8)
26129 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
26130 if (TARGET_64BIT)
26132 dest = change_address (destmem, DImode, destptr);
26133 emit_insn (gen_strset (destptr, dest, value));
26135 else
26137 dest = change_address (destmem, SImode, destptr);
26138 emit_insn (gen_strset (destptr, dest, value));
26139 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26140 emit_insn (gen_strset (destptr, dest, value));
26142 emit_label (label);
26143 LABEL_NUSES (label) = 1;
26145 if (max_size > 4)
26147 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26148 dest = change_address (destmem, SImode, destptr);
26149 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
26150 emit_label (label);
26151 LABEL_NUSES (label) = 1;
26153 if (max_size > 2)
26155 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26156 dest = change_address (destmem, HImode, destptr);
26157 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
26158 emit_label (label);
26159 LABEL_NUSES (label) = 1;
26161 if (max_size > 1)
26163 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26164 dest = change_address (destmem, QImode, destptr);
26165 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
26166 emit_label (label);
26167 LABEL_NUSES (label) = 1;
26171 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
26172 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
26173 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
26174 ignored.
26175 Return value is updated DESTMEM. */
26176 static rtx
26177 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
26178 rtx destptr, rtx srcptr, rtx value,
26179 rtx vec_value, rtx count, int align,
26180 int desired_alignment, bool issetmem)
26182 int i;
26183 for (i = 1; i < desired_alignment; i <<= 1)
26185 if (align <= i)
26187 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
26188 if (issetmem)
26190 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26191 destmem = emit_memset (destmem, destptr, vec_value, i);
26192 else
26193 destmem = emit_memset (destmem, destptr, value, i);
26195 else
26196 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
26197 ix86_adjust_counter (count, i);
26198 emit_label (label);
26199 LABEL_NUSES (label) = 1;
26200 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
26203 return destmem;
26206 /* Test if COUNT&SIZE is nonzero and if so, expand movme
26207 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
26208 and jump to DONE_LABEL. */
26209 static void
26210 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
26211 rtx destptr, rtx srcptr,
26212 rtx value, rtx vec_value,
26213 rtx count, int size,
26214 rtx done_label, bool issetmem)
26216 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
26217 machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk ();
26218 rtx modesize;
26219 int n;
26221 /* If we do not have vector value to copy, we must reduce size. */
26222 if (issetmem)
26224 if (!vec_value)
26226 if (GET_MODE (value) == VOIDmode && size > 8)
26227 mode = Pmode;
26228 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
26229 mode = GET_MODE (value);
26231 else
26232 mode = GET_MODE (vec_value), value = vec_value;
26234 else
26236 /* Choose appropriate vector mode. */
26237 if (size >= 32)
26238 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
26239 else if (size >= 16)
26240 mode = TARGET_SSE ? V16QImode : DImode;
26241 srcmem = change_address (srcmem, mode, srcptr);
26243 destmem = change_address (destmem, mode, destptr);
26244 modesize = GEN_INT (GET_MODE_SIZE (mode));
26245 gcc_assert (GET_MODE_SIZE (mode) <= size);
26246 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
26248 if (issetmem)
26249 emit_move_insn (destmem, gen_lowpart (mode, value));
26250 else
26252 emit_move_insn (destmem, srcmem);
26253 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26255 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26258 destmem = offset_address (destmem, count, 1);
26259 destmem = offset_address (destmem, GEN_INT (-2 * size),
26260 GET_MODE_SIZE (mode));
26261 if (!issetmem)
26263 srcmem = offset_address (srcmem, count, 1);
26264 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
26265 GET_MODE_SIZE (mode));
26267 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
26269 if (issetmem)
26270 emit_move_insn (destmem, gen_lowpart (mode, value));
26271 else
26273 emit_move_insn (destmem, srcmem);
26274 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26276 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26278 emit_jump_insn (gen_jump (done_label));
26279 emit_barrier ();
26281 emit_label (label);
26282 LABEL_NUSES (label) = 1;
26285 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
26286 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
26287 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
26288 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
26289 DONE_LABEL is a label after the whole copying sequence. The label is created
26290 on demand if *DONE_LABEL is NULL.
26291 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
26292 bounds after the initial copies.
26294 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
26295 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
26296 we will dispatch to a library call for large blocks.
26298 In pseudocode we do:
26300 if (COUNT < SIZE)
26302 Assume that SIZE is 4. Bigger sizes are handled analogously
26303 if (COUNT & 4)
26305 copy 4 bytes from SRCPTR to DESTPTR
26306 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
26307 goto done_label
26309 if (!COUNT)
26310 goto done_label;
26311 copy 1 byte from SRCPTR to DESTPTR
26312 if (COUNT & 2)
26314 copy 2 bytes from SRCPTR to DESTPTR
26315 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
26318 else
26320 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
26321 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
26323 OLD_DESPTR = DESTPTR;
26324 Align DESTPTR up to DESIRED_ALIGN
26325 SRCPTR += DESTPTR - OLD_DESTPTR
26326 COUNT -= DEST_PTR - OLD_DESTPTR
26327 if (DYNAMIC_CHECK)
26328 Round COUNT down to multiple of SIZE
26329 << optional caller supplied zero size guard is here >>
26330 << optional caller supplied dynamic check is here >>
26331 << caller supplied main copy loop is here >>
26333 done_label:
26335 static void
26336 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
26337 rtx *destptr, rtx *srcptr,
26338 machine_mode mode,
26339 rtx value, rtx vec_value,
26340 rtx *count,
26341 rtx_code_label **done_label,
26342 int size,
26343 int desired_align,
26344 int align,
26345 unsigned HOST_WIDE_INT *min_size,
26346 bool dynamic_check,
26347 bool issetmem)
26349 rtx_code_label *loop_label = NULL, *label;
26350 int n;
26351 rtx modesize;
26352 int prolog_size = 0;
26353 rtx mode_value;
26355 /* Chose proper value to copy. */
26356 if (issetmem && VECTOR_MODE_P (mode))
26357 mode_value = vec_value;
26358 else
26359 mode_value = value;
26360 gcc_assert (GET_MODE_SIZE (mode) <= size);
26362 /* See if block is big or small, handle small blocks. */
26363 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
26365 int size2 = size;
26366 loop_label = gen_label_rtx ();
26368 if (!*done_label)
26369 *done_label = gen_label_rtx ();
26371 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
26372 1, loop_label);
26373 size2 >>= 1;
26375 /* Handle sizes > 3. */
26376 for (;size2 > 2; size2 >>= 1)
26377 expand_small_movmem_or_setmem (destmem, srcmem,
26378 *destptr, *srcptr,
26379 value, vec_value,
26380 *count,
26381 size2, *done_label, issetmem);
26382 /* Nothing to copy? Jump to DONE_LABEL if so */
26383 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
26384 1, *done_label);
26386 /* Do a byte copy. */
26387 destmem = change_address (destmem, QImode, *destptr);
26388 if (issetmem)
26389 emit_move_insn (destmem, gen_lowpart (QImode, value));
26390 else
26392 srcmem = change_address (srcmem, QImode, *srcptr);
26393 emit_move_insn (destmem, srcmem);
26396 /* Handle sizes 2 and 3. */
26397 label = ix86_expand_aligntest (*count, 2, false);
26398 destmem = change_address (destmem, HImode, *destptr);
26399 destmem = offset_address (destmem, *count, 1);
26400 destmem = offset_address (destmem, GEN_INT (-2), 2);
26401 if (issetmem)
26402 emit_move_insn (destmem, gen_lowpart (HImode, value));
26403 else
26405 srcmem = change_address (srcmem, HImode, *srcptr);
26406 srcmem = offset_address (srcmem, *count, 1);
26407 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
26408 emit_move_insn (destmem, srcmem);
26411 emit_label (label);
26412 LABEL_NUSES (label) = 1;
26413 emit_jump_insn (gen_jump (*done_label));
26414 emit_barrier ();
26416 else
26417 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
26418 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
26420 /* Start memcpy for COUNT >= SIZE. */
26421 if (loop_label)
26423 emit_label (loop_label);
26424 LABEL_NUSES (loop_label) = 1;
26427 /* Copy first desired_align bytes. */
26428 if (!issetmem)
26429 srcmem = change_address (srcmem, mode, *srcptr);
26430 destmem = change_address (destmem, mode, *destptr);
26431 modesize = GEN_INT (GET_MODE_SIZE (mode));
26432 for (n = 0; prolog_size < desired_align - align; n++)
26434 if (issetmem)
26435 emit_move_insn (destmem, mode_value);
26436 else
26438 emit_move_insn (destmem, srcmem);
26439 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
26441 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
26442 prolog_size += GET_MODE_SIZE (mode);
26446 /* Copy last SIZE bytes. */
26447 destmem = offset_address (destmem, *count, 1);
26448 destmem = offset_address (destmem,
26449 GEN_INT (-size - prolog_size),
26451 if (issetmem)
26452 emit_move_insn (destmem, mode_value);
26453 else
26455 srcmem = offset_address (srcmem, *count, 1);
26456 srcmem = offset_address (srcmem,
26457 GEN_INT (-size - prolog_size),
26459 emit_move_insn (destmem, srcmem);
26461 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
26463 destmem = offset_address (destmem, modesize, 1);
26464 if (issetmem)
26465 emit_move_insn (destmem, mode_value);
26466 else
26468 srcmem = offset_address (srcmem, modesize, 1);
26469 emit_move_insn (destmem, srcmem);
26473 /* Align destination. */
26474 if (desired_align > 1 && desired_align > align)
26476 rtx saveddest = *destptr;
26478 gcc_assert (desired_align <= size);
26479 /* Align destptr up, place it to new register. */
26480 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
26481 GEN_INT (prolog_size),
26482 NULL_RTX, 1, OPTAB_DIRECT);
26483 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
26484 REG_POINTER (*destptr) = 1;
26485 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
26486 GEN_INT (-desired_align),
26487 *destptr, 1, OPTAB_DIRECT);
26488 /* See how many bytes we skipped. */
26489 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
26490 *destptr,
26491 saveddest, 1, OPTAB_DIRECT);
26492 /* Adjust srcptr and count. */
26493 if (!issetmem)
26494 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
26495 saveddest, *srcptr, 1, OPTAB_DIRECT);
26496 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
26497 saveddest, *count, 1, OPTAB_DIRECT);
26498 /* We copied at most size + prolog_size. */
26499 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
26500 *min_size
26501 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
26502 else
26503 *min_size = 0;
26505 /* Our loops always round down the block size, but for dispatch to
26506 library we need precise value. */
26507 if (dynamic_check)
26508 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
26509 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
26511 else
26513 gcc_assert (prolog_size == 0);
26514 /* Decrease count, so we won't end up copying last word twice. */
26515 if (!CONST_INT_P (*count))
26516 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
26517 constm1_rtx, *count, 1, OPTAB_DIRECT);
26518 else
26519 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
26520 (unsigned HOST_WIDE_INT)size));
26521 if (*min_size)
26522 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
26527 /* This function is like the previous one, except here we know how many bytes
26528 need to be copied. That allows us to update alignment not only of DST, which
26529 is returned, but also of SRC, which is passed as a pointer for that
26530 reason. */
26531 static rtx
26532 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
26533 rtx srcreg, rtx value, rtx vec_value,
26534 int desired_align, int align_bytes,
26535 bool issetmem)
26537 rtx src = NULL;
26538 rtx orig_dst = dst;
26539 rtx orig_src = NULL;
26540 int piece_size = 1;
26541 int copied_bytes = 0;
26543 if (!issetmem)
26545 gcc_assert (srcp != NULL);
26546 src = *srcp;
26547 orig_src = src;
26550 for (piece_size = 1;
26551 piece_size <= desired_align && copied_bytes < align_bytes;
26552 piece_size <<= 1)
26554 if (align_bytes & piece_size)
26556 if (issetmem)
26558 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
26559 dst = emit_memset (dst, destreg, vec_value, piece_size);
26560 else
26561 dst = emit_memset (dst, destreg, value, piece_size);
26563 else
26564 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
26565 copied_bytes += piece_size;
26568 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
26569 set_mem_align (dst, desired_align * BITS_PER_UNIT);
26570 if (MEM_SIZE_KNOWN_P (orig_dst))
26571 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
26573 if (!issetmem)
26575 int src_align_bytes = get_mem_align_offset (src, desired_align
26576 * BITS_PER_UNIT);
26577 if (src_align_bytes >= 0)
26578 src_align_bytes = desired_align - src_align_bytes;
26579 if (src_align_bytes >= 0)
26581 unsigned int src_align;
26582 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
26584 if ((src_align_bytes & (src_align - 1))
26585 == (align_bytes & (src_align - 1)))
26586 break;
26588 if (src_align > (unsigned int) desired_align)
26589 src_align = desired_align;
26590 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
26591 set_mem_align (src, src_align * BITS_PER_UNIT);
26593 if (MEM_SIZE_KNOWN_P (orig_src))
26594 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
26595 *srcp = src;
26598 return dst;
26601 /* Return true if ALG can be used in current context.
26602 Assume we expand memset if MEMSET is true. */
26603 static bool
26604 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
26606 if (alg == no_stringop)
26607 return false;
26608 if (alg == vector_loop)
26609 return TARGET_SSE || TARGET_AVX;
26610 /* Algorithms using the rep prefix want at least edi and ecx;
26611 additionally, memset wants eax and memcpy wants esi. Don't
26612 consider such algorithms if the user has appropriated those
26613 registers for their own purposes, or if we have a non-default
26614 address space, since some string insns cannot override the segment. */
26615 if (alg == rep_prefix_1_byte
26616 || alg == rep_prefix_4_byte
26617 || alg == rep_prefix_8_byte)
26619 if (have_as)
26620 return false;
26621 if (fixed_regs[CX_REG]
26622 || fixed_regs[DI_REG]
26623 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
26624 return false;
26626 return true;
26629 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
26630 static enum stringop_alg
26631 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
26632 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
26633 bool memset, bool zero_memset, bool have_as,
26634 int *dynamic_check, bool *noalign, bool recur)
26636 const struct stringop_algs *algs;
26637 bool optimize_for_speed;
26638 int max = 0;
26639 const struct processor_costs *cost;
26640 int i;
26641 bool any_alg_usable_p = false;
26643 *noalign = false;
26644 *dynamic_check = -1;
26646 /* Even if the string operation call is cold, we still might spend a lot
26647 of time processing large blocks. */
26648 if (optimize_function_for_size_p (cfun)
26649 || (optimize_insn_for_size_p ()
26650 && (max_size < 256
26651 || (expected_size != -1 && expected_size < 256))))
26652 optimize_for_speed = false;
26653 else
26654 optimize_for_speed = true;
26656 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
26657 if (memset)
26658 algs = &cost->memset[TARGET_64BIT != 0];
26659 else
26660 algs = &cost->memcpy[TARGET_64BIT != 0];
26662 /* See maximal size for user defined algorithm. */
26663 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26665 enum stringop_alg candidate = algs->size[i].alg;
26666 bool usable = alg_usable_p (candidate, memset, have_as);
26667 any_alg_usable_p |= usable;
26669 if (candidate != libcall && candidate && usable)
26670 max = algs->size[i].max;
26673 /* If expected size is not known but max size is small enough
26674 so inline version is a win, set expected size into
26675 the range. */
26676 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
26677 && expected_size == -1)
26678 expected_size = min_size / 2 + max_size / 2;
26680 /* If user specified the algorithm, honor it if possible. */
26681 if (ix86_stringop_alg != no_stringop
26682 && alg_usable_p (ix86_stringop_alg, memset, have_as))
26683 return ix86_stringop_alg;
26684 /* rep; movq or rep; movl is the smallest variant. */
26685 else if (!optimize_for_speed)
26687 *noalign = true;
26688 if (!count || (count & 3) || (memset && !zero_memset))
26689 return alg_usable_p (rep_prefix_1_byte, memset, have_as)
26690 ? rep_prefix_1_byte : loop_1_byte;
26691 else
26692 return alg_usable_p (rep_prefix_4_byte, memset, have_as)
26693 ? rep_prefix_4_byte : loop;
26695 /* Very tiny blocks are best handled via the loop, REP is expensive to
26696 setup. */
26697 else if (expected_size != -1 && expected_size < 4)
26698 return loop_1_byte;
26699 else if (expected_size != -1)
26701 enum stringop_alg alg = libcall;
26702 bool alg_noalign = false;
26703 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26705 /* We get here if the algorithms that were not libcall-based
26706 were rep-prefix based and we are unable to use rep prefixes
26707 based on global register usage. Break out of the loop and
26708 use the heuristic below. */
26709 if (algs->size[i].max == 0)
26710 break;
26711 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
26713 enum stringop_alg candidate = algs->size[i].alg;
26715 if (candidate != libcall
26716 && alg_usable_p (candidate, memset, have_as))
26718 alg = candidate;
26719 alg_noalign = algs->size[i].noalign;
26721 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
26722 last non-libcall inline algorithm. */
26723 if (TARGET_INLINE_ALL_STRINGOPS)
26725 /* When the current size is best to be copied by a libcall,
26726 but we are still forced to inline, run the heuristic below
26727 that will pick code for medium sized blocks. */
26728 if (alg != libcall)
26730 *noalign = alg_noalign;
26731 return alg;
26733 else if (!any_alg_usable_p)
26734 break;
26736 else if (alg_usable_p (candidate, memset, have_as))
26738 *noalign = algs->size[i].noalign;
26739 return candidate;
26744 /* When asked to inline the call anyway, try to pick meaningful choice.
26745 We look for maximal size of block that is faster to copy by hand and
26746 take blocks of at most of that size guessing that average size will
26747 be roughly half of the block.
26749 If this turns out to be bad, we might simply specify the preferred
26750 choice in ix86_costs. */
26751 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26752 && (algs->unknown_size == libcall
26753 || !alg_usable_p (algs->unknown_size, memset, have_as)))
26755 enum stringop_alg alg;
26756 HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
26758 /* If there aren't any usable algorithms or if recursing already,
26759 then recursing on smaller sizes or same size isn't going to
26760 find anything. Just return the simple byte-at-a-time copy loop. */
26761 if (!any_alg_usable_p || recur)
26763 /* Pick something reasonable. */
26764 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
26765 *dynamic_check = 128;
26766 return loop_1_byte;
26768 alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
26769 zero_memset, have_as, dynamic_check, noalign, true);
26770 gcc_assert (*dynamic_check == -1);
26771 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26772 *dynamic_check = max;
26773 else
26774 gcc_assert (alg != libcall);
26775 return alg;
26777 return (alg_usable_p (algs->unknown_size, memset, have_as)
26778 ? algs->unknown_size : libcall);
26781 /* Decide on alignment. We know that the operand is already aligned to ALIGN
26782 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
26783 static int
26784 decide_alignment (int align,
26785 enum stringop_alg alg,
26786 int expected_size,
26787 machine_mode move_mode)
26789 int desired_align = 0;
26791 gcc_assert (alg != no_stringop);
26793 if (alg == libcall)
26794 return 0;
26795 if (move_mode == VOIDmode)
26796 return 0;
26798 desired_align = GET_MODE_SIZE (move_mode);
26799 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
26800 copying whole cacheline at once. */
26801 if (TARGET_PENTIUMPRO
26802 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
26803 desired_align = 8;
26805 if (optimize_size)
26806 desired_align = 1;
26807 if (desired_align < align)
26808 desired_align = align;
26809 if (expected_size != -1 && expected_size < 4)
26810 desired_align = align;
26812 return desired_align;
26816 /* Helper function for memcpy. For QImode value 0xXY produce
26817 0xXYXYXYXY of wide specified by MODE. This is essentially
26818 a * 0x10101010, but we can do slightly better than
26819 synth_mult by unwinding the sequence by hand on CPUs with
26820 slow multiply. */
26821 static rtx
26822 promote_duplicated_reg (machine_mode mode, rtx val)
26824 machine_mode valmode = GET_MODE (val);
26825 rtx tmp;
26826 int nops = mode == DImode ? 3 : 2;
26828 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
26829 if (val == const0_rtx)
26830 return copy_to_mode_reg (mode, CONST0_RTX (mode));
26831 if (CONST_INT_P (val))
26833 HOST_WIDE_INT v = INTVAL (val) & 255;
26835 v |= v << 8;
26836 v |= v << 16;
26837 if (mode == DImode)
26838 v |= (v << 16) << 16;
26839 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
26842 if (valmode == VOIDmode)
26843 valmode = QImode;
26844 if (valmode != QImode)
26845 val = gen_lowpart (QImode, val);
26846 if (mode == QImode)
26847 return val;
26848 if (!TARGET_PARTIAL_REG_STALL)
26849 nops--;
26850 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
26851 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
26852 <= (ix86_cost->shift_const + ix86_cost->add) * nops
26853 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
26855 rtx reg = convert_modes (mode, QImode, val, true);
26856 tmp = promote_duplicated_reg (mode, const1_rtx);
26857 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
26858 OPTAB_DIRECT);
26860 else
26862 rtx reg = convert_modes (mode, QImode, val, true);
26864 if (!TARGET_PARTIAL_REG_STALL)
26865 if (mode == SImode)
26866 emit_insn (gen_insvsi_1 (reg, reg));
26867 else
26868 emit_insn (gen_insvdi_1 (reg, reg));
26869 else
26871 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
26872 NULL, 1, OPTAB_DIRECT);
26873 reg =
26874 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26876 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
26877 NULL, 1, OPTAB_DIRECT);
26878 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26879 if (mode == SImode)
26880 return reg;
26881 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
26882 NULL, 1, OPTAB_DIRECT);
26883 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26884 return reg;
26888 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
26889 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
26890 alignment from ALIGN to DESIRED_ALIGN. */
26891 static rtx
26892 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
26893 int align)
26895 rtx promoted_val;
26897 if (TARGET_64BIT
26898 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
26899 promoted_val = promote_duplicated_reg (DImode, val);
26900 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
26901 promoted_val = promote_duplicated_reg (SImode, val);
26902 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
26903 promoted_val = promote_duplicated_reg (HImode, val);
26904 else
26905 promoted_val = val;
26907 return promoted_val;
26910 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
26911 operations when profitable. The code depends upon architecture, block size
26912 and alignment, but always has one of the following overall structures:
26914 Aligned move sequence:
26916 1) Prologue guard: Conditional that jumps up to epilogues for small
26917 blocks that can be handled by epilogue alone. This is faster
26918 but also needed for correctness, since prologue assume the block
26919 is larger than the desired alignment.
26921 Optional dynamic check for size and libcall for large
26922 blocks is emitted here too, with -minline-stringops-dynamically.
26924 2) Prologue: copy first few bytes in order to get destination
26925 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
26926 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
26927 copied. We emit either a jump tree on power of two sized
26928 blocks, or a byte loop.
26930 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26931 with specified algorithm.
26933 4) Epilogue: code copying tail of the block that is too small to be
26934 handled by main body (or up to size guarded by prologue guard).
26936 Misaligned move sequence
26938 1) missaligned move prologue/epilogue containing:
26939 a) Prologue handling small memory blocks and jumping to done_label
26940 (skipped if blocks are known to be large enough)
26941 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
26942 needed by single possibly misaligned move
26943 (skipped if alignment is not needed)
26944 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
26946 2) Zero size guard dispatching to done_label, if needed
26948 3) dispatch to library call, if needed,
26950 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26951 with specified algorithm. */
26952 bool
26953 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
26954 rtx align_exp, rtx expected_align_exp,
26955 rtx expected_size_exp, rtx min_size_exp,
26956 rtx max_size_exp, rtx probable_max_size_exp,
26957 bool issetmem)
26959 rtx destreg;
26960 rtx srcreg = NULL;
26961 rtx_code_label *label = NULL;
26962 rtx tmp;
26963 rtx_code_label *jump_around_label = NULL;
26964 HOST_WIDE_INT align = 1;
26965 unsigned HOST_WIDE_INT count = 0;
26966 HOST_WIDE_INT expected_size = -1;
26967 int size_needed = 0, epilogue_size_needed;
26968 int desired_align = 0, align_bytes = 0;
26969 enum stringop_alg alg;
26970 rtx promoted_val = NULL;
26971 rtx vec_promoted_val = NULL;
26972 bool force_loopy_epilogue = false;
26973 int dynamic_check;
26974 bool need_zero_guard = false;
26975 bool noalign;
26976 machine_mode move_mode = VOIDmode;
26977 machine_mode wider_mode;
26978 int unroll_factor = 1;
26979 /* TODO: Once value ranges are available, fill in proper data. */
26980 unsigned HOST_WIDE_INT min_size = 0;
26981 unsigned HOST_WIDE_INT max_size = -1;
26982 unsigned HOST_WIDE_INT probable_max_size = -1;
26983 bool misaligned_prologue_used = false;
26984 bool have_as;
26986 if (CONST_INT_P (align_exp))
26987 align = INTVAL (align_exp);
26988 /* i386 can do misaligned access on reasonably increased cost. */
26989 if (CONST_INT_P (expected_align_exp)
26990 && INTVAL (expected_align_exp) > align)
26991 align = INTVAL (expected_align_exp);
26992 /* ALIGN is the minimum of destination and source alignment, but we care here
26993 just about destination alignment. */
26994 else if (!issetmem
26995 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
26996 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
26998 if (CONST_INT_P (count_exp))
27000 min_size = max_size = probable_max_size = count = expected_size
27001 = INTVAL (count_exp);
27002 /* When COUNT is 0, there is nothing to do. */
27003 if (!count)
27004 return true;
27006 else
27008 if (min_size_exp)
27009 min_size = INTVAL (min_size_exp);
27010 if (max_size_exp)
27011 max_size = INTVAL (max_size_exp);
27012 if (probable_max_size_exp)
27013 probable_max_size = INTVAL (probable_max_size_exp);
27014 if (CONST_INT_P (expected_size_exp))
27015 expected_size = INTVAL (expected_size_exp);
27018 /* Make sure we don't need to care about overflow later on. */
27019 if (count > (HOST_WIDE_INT_1U << 30))
27020 return false;
27022 have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
27023 if (!issetmem)
27024 have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
27026 /* Step 0: Decide on preferred algorithm, desired alignment and
27027 size of chunks to be copied by main loop. */
27028 alg = decide_alg (count, expected_size, min_size, probable_max_size,
27029 issetmem,
27030 issetmem && val_exp == const0_rtx, have_as,
27031 &dynamic_check, &noalign, false);
27032 if (alg == libcall)
27033 return false;
27034 gcc_assert (alg != no_stringop);
27036 /* For now vector-version of memset is generated only for memory zeroing, as
27037 creating of promoted vector value is very cheap in this case. */
27038 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
27039 alg = unrolled_loop;
27041 if (!count)
27042 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
27043 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
27044 if (!issetmem)
27045 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
27047 unroll_factor = 1;
27048 move_mode = word_mode;
27049 switch (alg)
27051 case libcall:
27052 case no_stringop:
27053 case last_alg:
27054 gcc_unreachable ();
27055 case loop_1_byte:
27056 need_zero_guard = true;
27057 move_mode = QImode;
27058 break;
27059 case loop:
27060 need_zero_guard = true;
27061 break;
27062 case unrolled_loop:
27063 need_zero_guard = true;
27064 unroll_factor = (TARGET_64BIT ? 4 : 2);
27065 break;
27066 case vector_loop:
27067 need_zero_guard = true;
27068 unroll_factor = 4;
27069 /* Find the widest supported mode. */
27070 move_mode = word_mode;
27071 while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
27072 && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
27073 move_mode = wider_mode;
27075 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128)
27076 move_mode = TImode;
27078 /* Find the corresponding vector mode with the same size as MOVE_MODE.
27079 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
27080 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
27082 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
27083 if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
27084 || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
27085 move_mode = word_mode;
27087 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
27088 break;
27089 case rep_prefix_8_byte:
27090 move_mode = DImode;
27091 break;
27092 case rep_prefix_4_byte:
27093 move_mode = SImode;
27094 break;
27095 case rep_prefix_1_byte:
27096 move_mode = QImode;
27097 break;
27099 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
27100 epilogue_size_needed = size_needed;
27102 /* If we are going to call any library calls conditionally, make sure any
27103 pending stack adjustment happen before the first conditional branch,
27104 otherwise they will be emitted before the library call only and won't
27105 happen from the other branches. */
27106 if (dynamic_check != -1)
27107 do_pending_stack_adjust ();
27109 desired_align = decide_alignment (align, alg, expected_size, move_mode);
27110 if (!TARGET_ALIGN_STRINGOPS || noalign)
27111 align = desired_align;
27113 /* Step 1: Prologue guard. */
27115 /* Alignment code needs count to be in register. */
27116 if (CONST_INT_P (count_exp) && desired_align > align)
27118 if (INTVAL (count_exp) > desired_align
27119 && INTVAL (count_exp) > size_needed)
27121 align_bytes
27122 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
27123 if (align_bytes <= 0)
27124 align_bytes = 0;
27125 else
27126 align_bytes = desired_align - align_bytes;
27128 if (align_bytes == 0)
27129 count_exp = force_reg (counter_mode (count_exp), count_exp);
27131 gcc_assert (desired_align >= 1 && align >= 1);
27133 /* Misaligned move sequences handle both prologue and epilogue at once.
27134 Default code generation results in a smaller code for large alignments
27135 and also avoids redundant job when sizes are known precisely. */
27136 misaligned_prologue_used
27137 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
27138 && MAX (desired_align, epilogue_size_needed) <= 32
27139 && desired_align <= epilogue_size_needed
27140 && ((desired_align > align && !align_bytes)
27141 || (!count && epilogue_size_needed > 1)));
27143 /* Do the cheap promotion to allow better CSE across the
27144 main loop and epilogue (ie one load of the big constant in the
27145 front of all code.
27146 For now the misaligned move sequences do not have fast path
27147 without broadcasting. */
27148 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
27150 if (alg == vector_loop)
27152 gcc_assert (val_exp == const0_rtx);
27153 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
27154 promoted_val = promote_duplicated_reg_to_size (val_exp,
27155 GET_MODE_SIZE (word_mode),
27156 desired_align, align);
27158 else
27160 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
27161 desired_align, align);
27164 /* Misaligned move sequences handles both prologues and epilogues at once.
27165 Default code generation results in smaller code for large alignments and
27166 also avoids redundant job when sizes are known precisely. */
27167 if (misaligned_prologue_used)
27169 /* Misaligned move prologue handled small blocks by itself. */
27170 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
27171 (dst, src, &destreg, &srcreg,
27172 move_mode, promoted_val, vec_promoted_val,
27173 &count_exp,
27174 &jump_around_label,
27175 desired_align < align
27176 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
27177 desired_align, align, &min_size, dynamic_check, issetmem);
27178 if (!issetmem)
27179 src = change_address (src, BLKmode, srcreg);
27180 dst = change_address (dst, BLKmode, destreg);
27181 set_mem_align (dst, desired_align * BITS_PER_UNIT);
27182 epilogue_size_needed = 0;
27183 if (need_zero_guard
27184 && min_size < (unsigned HOST_WIDE_INT) size_needed)
27186 /* It is possible that we copied enough so the main loop will not
27187 execute. */
27188 gcc_assert (size_needed > 1);
27189 if (jump_around_label == NULL_RTX)
27190 jump_around_label = gen_label_rtx ();
27191 emit_cmp_and_jump_insns (count_exp,
27192 GEN_INT (size_needed),
27193 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
27194 if (expected_size == -1
27195 || expected_size < (desired_align - align) / 2 + size_needed)
27196 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27197 else
27198 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27201 /* Ensure that alignment prologue won't copy past end of block. */
27202 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
27204 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
27205 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
27206 Make sure it is power of 2. */
27207 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
27209 /* To improve performance of small blocks, we jump around the VAL
27210 promoting mode. This mean that if the promoted VAL is not constant,
27211 we might not use it in the epilogue and have to use byte
27212 loop variant. */
27213 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
27214 force_loopy_epilogue = true;
27215 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27216 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27218 /* If main algorithm works on QImode, no epilogue is needed.
27219 For small sizes just don't align anything. */
27220 if (size_needed == 1)
27221 desired_align = align;
27222 else
27223 goto epilogue;
27225 else if (!count
27226 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27228 label = gen_label_rtx ();
27229 emit_cmp_and_jump_insns (count_exp,
27230 GEN_INT (epilogue_size_needed),
27231 LTU, 0, counter_mode (count_exp), 1, label);
27232 if (expected_size == -1 || expected_size < epilogue_size_needed)
27233 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27234 else
27235 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27239 /* Emit code to decide on runtime whether library call or inline should be
27240 used. */
27241 if (dynamic_check != -1)
27243 if (!issetmem && CONST_INT_P (count_exp))
27245 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
27247 emit_block_copy_via_libcall (dst, src, count_exp);
27248 count_exp = const0_rtx;
27249 goto epilogue;
27252 else
27254 rtx_code_label *hot_label = gen_label_rtx ();
27255 if (jump_around_label == NULL_RTX)
27256 jump_around_label = gen_label_rtx ();
27257 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
27258 LEU, 0, counter_mode (count_exp),
27259 1, hot_label);
27260 predict_jump (REG_BR_PROB_BASE * 90 / 100);
27261 if (issetmem)
27262 set_storage_via_libcall (dst, count_exp, val_exp);
27263 else
27264 emit_block_copy_via_libcall (dst, src, count_exp);
27265 emit_jump (jump_around_label);
27266 emit_label (hot_label);
27270 /* Step 2: Alignment prologue. */
27271 /* Do the expensive promotion once we branched off the small blocks. */
27272 if (issetmem && !promoted_val)
27273 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
27274 desired_align, align);
27276 if (desired_align > align && !misaligned_prologue_used)
27278 if (align_bytes == 0)
27280 /* Except for the first move in prologue, we no longer know
27281 constant offset in aliasing info. It don't seems to worth
27282 the pain to maintain it for the first move, so throw away
27283 the info early. */
27284 dst = change_address (dst, BLKmode, destreg);
27285 if (!issetmem)
27286 src = change_address (src, BLKmode, srcreg);
27287 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
27288 promoted_val, vec_promoted_val,
27289 count_exp, align, desired_align,
27290 issetmem);
27291 /* At most desired_align - align bytes are copied. */
27292 if (min_size < (unsigned)(desired_align - align))
27293 min_size = 0;
27294 else
27295 min_size -= desired_align - align;
27297 else
27299 /* If we know how many bytes need to be stored before dst is
27300 sufficiently aligned, maintain aliasing info accurately. */
27301 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
27302 srcreg,
27303 promoted_val,
27304 vec_promoted_val,
27305 desired_align,
27306 align_bytes,
27307 issetmem);
27309 count_exp = plus_constant (counter_mode (count_exp),
27310 count_exp, -align_bytes);
27311 count -= align_bytes;
27312 min_size -= align_bytes;
27313 max_size -= align_bytes;
27315 if (need_zero_guard
27316 && min_size < (unsigned HOST_WIDE_INT) size_needed
27317 && (count < (unsigned HOST_WIDE_INT) size_needed
27318 || (align_bytes == 0
27319 && count < ((unsigned HOST_WIDE_INT) size_needed
27320 + desired_align - align))))
27322 /* It is possible that we copied enough so the main loop will not
27323 execute. */
27324 gcc_assert (size_needed > 1);
27325 if (label == NULL_RTX)
27326 label = gen_label_rtx ();
27327 emit_cmp_and_jump_insns (count_exp,
27328 GEN_INT (size_needed),
27329 LTU, 0, counter_mode (count_exp), 1, label);
27330 if (expected_size == -1
27331 || expected_size < (desired_align - align) / 2 + size_needed)
27332 predict_jump (REG_BR_PROB_BASE * 20 / 100);
27333 else
27334 predict_jump (REG_BR_PROB_BASE * 60 / 100);
27337 if (label && size_needed == 1)
27339 emit_label (label);
27340 LABEL_NUSES (label) = 1;
27341 label = NULL;
27342 epilogue_size_needed = 1;
27343 if (issetmem)
27344 promoted_val = val_exp;
27346 else if (label == NULL_RTX && !misaligned_prologue_used)
27347 epilogue_size_needed = size_needed;
27349 /* Step 3: Main loop. */
27351 switch (alg)
27353 case libcall:
27354 case no_stringop:
27355 case last_alg:
27356 gcc_unreachable ();
27357 case loop_1_byte:
27358 case loop:
27359 case unrolled_loop:
27360 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
27361 count_exp, move_mode, unroll_factor,
27362 expected_size, issetmem);
27363 break;
27364 case vector_loop:
27365 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
27366 vec_promoted_val, count_exp, move_mode,
27367 unroll_factor, expected_size, issetmem);
27368 break;
27369 case rep_prefix_8_byte:
27370 case rep_prefix_4_byte:
27371 case rep_prefix_1_byte:
27372 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
27373 val_exp, count_exp, move_mode, issetmem);
27374 break;
27376 /* Adjust properly the offset of src and dest memory for aliasing. */
27377 if (CONST_INT_P (count_exp))
27379 if (!issetmem)
27380 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
27381 (count / size_needed) * size_needed);
27382 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
27383 (count / size_needed) * size_needed);
27385 else
27387 if (!issetmem)
27388 src = change_address (src, BLKmode, srcreg);
27389 dst = change_address (dst, BLKmode, destreg);
27392 /* Step 4: Epilogue to copy the remaining bytes. */
27393 epilogue:
27394 if (label)
27396 /* When the main loop is done, COUNT_EXP might hold original count,
27397 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
27398 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
27399 bytes. Compensate if needed. */
27401 if (size_needed < epilogue_size_needed)
27403 tmp =
27404 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
27405 GEN_INT (size_needed - 1), count_exp, 1,
27406 OPTAB_DIRECT);
27407 if (tmp != count_exp)
27408 emit_move_insn (count_exp, tmp);
27410 emit_label (label);
27411 LABEL_NUSES (label) = 1;
27414 if (count_exp != const0_rtx && epilogue_size_needed > 1)
27416 if (force_loopy_epilogue)
27417 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
27418 epilogue_size_needed);
27419 else
27421 if (issetmem)
27422 expand_setmem_epilogue (dst, destreg, promoted_val,
27423 vec_promoted_val, count_exp,
27424 epilogue_size_needed);
27425 else
27426 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
27427 epilogue_size_needed);
27430 if (jump_around_label)
27431 emit_label (jump_around_label);
27432 return true;
27436 /* Expand the appropriate insns for doing strlen if not just doing
27437 repnz; scasb
27439 out = result, initialized with the start address
27440 align_rtx = alignment of the address.
27441 scratch = scratch register, initialized with the startaddress when
27442 not aligned, otherwise undefined
27444 This is just the body. It needs the initializations mentioned above and
27445 some address computing at the end. These things are done in i386.md. */
27447 static void
27448 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
27450 int align;
27451 rtx tmp;
27452 rtx_code_label *align_2_label = NULL;
27453 rtx_code_label *align_3_label = NULL;
27454 rtx_code_label *align_4_label = gen_label_rtx ();
27455 rtx_code_label *end_0_label = gen_label_rtx ();
27456 rtx mem;
27457 rtx tmpreg = gen_reg_rtx (SImode);
27458 rtx scratch = gen_reg_rtx (SImode);
27459 rtx cmp;
27461 align = 0;
27462 if (CONST_INT_P (align_rtx))
27463 align = INTVAL (align_rtx);
27465 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
27467 /* Is there a known alignment and is it less than 4? */
27468 if (align < 4)
27470 rtx scratch1 = gen_reg_rtx (Pmode);
27471 emit_move_insn (scratch1, out);
27472 /* Is there a known alignment and is it not 2? */
27473 if (align != 2)
27475 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
27476 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
27478 /* Leave just the 3 lower bits. */
27479 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
27480 NULL_RTX, 0, OPTAB_WIDEN);
27482 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
27483 Pmode, 1, align_4_label);
27484 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
27485 Pmode, 1, align_2_label);
27486 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
27487 Pmode, 1, align_3_label);
27489 else
27491 /* Since the alignment is 2, we have to check 2 or 0 bytes;
27492 check if is aligned to 4 - byte. */
27494 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
27495 NULL_RTX, 0, OPTAB_WIDEN);
27497 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
27498 Pmode, 1, align_4_label);
27501 mem = change_address (src, QImode, out);
27503 /* Now compare the bytes. */
27505 /* Compare the first n unaligned byte on a byte per byte basis. */
27506 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
27507 QImode, 1, end_0_label);
27509 /* Increment the address. */
27510 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27512 /* Not needed with an alignment of 2 */
27513 if (align != 2)
27515 emit_label (align_2_label);
27517 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
27518 end_0_label);
27520 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27522 emit_label (align_3_label);
27525 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
27526 end_0_label);
27528 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
27531 /* Generate loop to check 4 bytes at a time. It is not a good idea to
27532 align this loop. It gives only huge programs, but does not help to
27533 speed up. */
27534 emit_label (align_4_label);
27536 mem = change_address (src, SImode, out);
27537 emit_move_insn (scratch, mem);
27538 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
27540 /* This formula yields a nonzero result iff one of the bytes is zero.
27541 This saves three branches inside loop and many cycles. */
27543 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
27544 emit_insn (gen_one_cmplsi2 (scratch, scratch));
27545 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
27546 emit_insn (gen_andsi3 (tmpreg, tmpreg,
27547 gen_int_mode (0x80808080, SImode)));
27548 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
27549 align_4_label);
27551 if (TARGET_CMOVE)
27553 rtx reg = gen_reg_rtx (SImode);
27554 rtx reg2 = gen_reg_rtx (Pmode);
27555 emit_move_insn (reg, tmpreg);
27556 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
27558 /* If zero is not in the first two bytes, move two bytes forward. */
27559 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
27560 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27561 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
27562 emit_insn (gen_rtx_SET (tmpreg,
27563 gen_rtx_IF_THEN_ELSE (SImode, tmp,
27564 reg,
27565 tmpreg)));
27566 /* Emit lea manually to avoid clobbering of flags. */
27567 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
27569 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27570 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
27571 emit_insn (gen_rtx_SET (out,
27572 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
27573 reg2,
27574 out)));
27576 else
27578 rtx_code_label *end_2_label = gen_label_rtx ();
27579 /* Is zero in the first two bytes? */
27581 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
27582 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27583 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
27584 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
27585 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
27586 pc_rtx);
27587 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
27588 JUMP_LABEL (tmp) = end_2_label;
27590 /* Not in the first two. Move two bytes forward. */
27591 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
27592 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
27594 emit_label (end_2_label);
27598 /* Avoid branch in fixing the byte. */
27599 tmpreg = gen_lowpart (QImode, tmpreg);
27600 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
27601 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
27602 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
27603 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
27605 emit_label (end_0_label);
27608 /* Expand strlen. */
27610 bool
27611 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
27613 rtx addr, scratch1, scratch2, scratch3, scratch4;
27615 /* The generic case of strlen expander is long. Avoid it's
27616 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
27618 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
27619 && !TARGET_INLINE_ALL_STRINGOPS
27620 && !optimize_insn_for_size_p ()
27621 && (!CONST_INT_P (align) || INTVAL (align) < 4))
27622 return false;
27624 addr = force_reg (Pmode, XEXP (src, 0));
27625 scratch1 = gen_reg_rtx (Pmode);
27627 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
27628 && !optimize_insn_for_size_p ())
27630 /* Well it seems that some optimizer does not combine a call like
27631 foo(strlen(bar), strlen(bar));
27632 when the move and the subtraction is done here. It does calculate
27633 the length just once when these instructions are done inside of
27634 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
27635 often used and I use one fewer register for the lifetime of
27636 output_strlen_unroll() this is better. */
27638 emit_move_insn (out, addr);
27640 ix86_expand_strlensi_unroll_1 (out, src, align);
27642 /* strlensi_unroll_1 returns the address of the zero at the end of
27643 the string, like memchr(), so compute the length by subtracting
27644 the start address. */
27645 emit_insn (ix86_gen_sub3 (out, out, addr));
27647 else
27649 rtx unspec;
27651 /* Can't use this if the user has appropriated eax, ecx, or edi. */
27652 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
27653 return false;
27654 /* Can't use this for non-default address spaces. */
27655 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
27656 return false;
27658 scratch2 = gen_reg_rtx (Pmode);
27659 scratch3 = gen_reg_rtx (Pmode);
27660 scratch4 = force_reg (Pmode, constm1_rtx);
27662 emit_move_insn (scratch3, addr);
27663 eoschar = force_reg (QImode, eoschar);
27665 src = replace_equiv_address_nv (src, scratch3);
27667 /* If .md starts supporting :P, this can be done in .md. */
27668 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
27669 scratch4), UNSPEC_SCAS);
27670 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
27671 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
27672 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
27674 return true;
27677 /* For given symbol (function) construct code to compute address of it's PLT
27678 entry in large x86-64 PIC model. */
27679 static rtx
27680 construct_plt_address (rtx symbol)
27682 rtx tmp, unspec;
27684 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
27685 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
27686 gcc_assert (Pmode == DImode);
27688 tmp = gen_reg_rtx (Pmode);
27689 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
27691 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
27692 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
27693 return tmp;
27697 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
27698 rtx callarg2,
27699 rtx pop, bool sibcall)
27701 rtx vec[3];
27702 rtx use = NULL, call;
27703 unsigned int vec_len = 0;
27704 tree fndecl;
27706 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27708 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
27709 if (fndecl
27710 && (lookup_attribute ("interrupt",
27711 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))
27712 error ("interrupt service routine can't be called directly");
27714 else
27715 fndecl = NULL_TREE;
27717 if (pop == const0_rtx)
27718 pop = NULL;
27719 gcc_assert (!TARGET_64BIT || !pop);
27721 if (TARGET_MACHO && !TARGET_64BIT)
27723 #if TARGET_MACHO
27724 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27725 fnaddr = machopic_indirect_call_target (fnaddr);
27726 #endif
27728 else
27730 /* Static functions and indirect calls don't need the pic register. Also,
27731 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
27732 it an indirect call. */
27733 rtx addr = XEXP (fnaddr, 0);
27734 if (flag_pic
27735 && GET_CODE (addr) == SYMBOL_REF
27736 && !SYMBOL_REF_LOCAL_P (addr))
27738 if (flag_plt
27739 && (SYMBOL_REF_DECL (addr) == NULL_TREE
27740 || !lookup_attribute ("noplt",
27741 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
27743 if (!TARGET_64BIT
27744 || (ix86_cmodel == CM_LARGE_PIC
27745 && DEFAULT_ABI != MS_ABI))
27747 use_reg (&use, gen_rtx_REG (Pmode,
27748 REAL_PIC_OFFSET_TABLE_REGNUM));
27749 if (ix86_use_pseudo_pic_reg ())
27750 emit_move_insn (gen_rtx_REG (Pmode,
27751 REAL_PIC_OFFSET_TABLE_REGNUM),
27752 pic_offset_table_rtx);
27755 else if (!TARGET_PECOFF && !TARGET_MACHO)
27757 if (TARGET_64BIT)
27759 fnaddr = gen_rtx_UNSPEC (Pmode,
27760 gen_rtvec (1, addr),
27761 UNSPEC_GOTPCREL);
27762 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27764 else
27766 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
27767 UNSPEC_GOT);
27768 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27769 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
27770 fnaddr);
27772 fnaddr = gen_const_mem (Pmode, fnaddr);
27773 /* Pmode may not be the same as word_mode for x32, which
27774 doesn't support indirect branch via 32-bit memory slot.
27775 Since x32 GOT slot is 64 bit with zero upper 32 bits,
27776 indirect branch via x32 GOT slot is OK. */
27777 if (GET_MODE (fnaddr) != word_mode)
27778 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
27779 fnaddr = gen_rtx_MEM (QImode, fnaddr);
27784 /* Skip setting up RAX register for -mskip-rax-setup when there are no
27785 parameters passed in vector registers. */
27786 if (TARGET_64BIT
27787 && (INTVAL (callarg2) > 0
27788 || (INTVAL (callarg2) == 0
27789 && (TARGET_SSE || !flag_skip_rax_setup))))
27791 rtx al = gen_rtx_REG (QImode, AX_REG);
27792 emit_move_insn (al, callarg2);
27793 use_reg (&use, al);
27796 if (ix86_cmodel == CM_LARGE_PIC
27797 && !TARGET_PECOFF
27798 && MEM_P (fnaddr)
27799 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
27800 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
27801 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
27802 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
27803 branch via x32 GOT slot is OK. */
27804 else if (!(TARGET_X32
27805 && MEM_P (fnaddr)
27806 && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
27807 && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
27808 && (sibcall
27809 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
27810 : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
27812 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
27813 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
27816 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
27818 if (retval)
27820 /* We should add bounds as destination register in case
27821 pointer with bounds may be returned. */
27822 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
27824 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
27825 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
27826 if (GET_CODE (retval) == PARALLEL)
27828 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
27829 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
27830 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
27831 retval = chkp_join_splitted_slot (retval, par);
27833 else
27835 retval = gen_rtx_PARALLEL (VOIDmode,
27836 gen_rtvec (3, retval, b0, b1));
27837 chkp_put_regs_to_expr_list (retval);
27841 call = gen_rtx_SET (retval, call);
27843 vec[vec_len++] = call;
27845 if (pop)
27847 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
27848 pop = gen_rtx_SET (stack_pointer_rtx, pop);
27849 vec[vec_len++] = pop;
27852 if (cfun->machine->no_caller_saved_registers
27853 && (!fndecl
27854 || (!TREE_THIS_VOLATILE (fndecl)
27855 && !lookup_attribute ("no_caller_saved_registers",
27856 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))))
27858 static const char ix86_call_used_regs[] = CALL_USED_REGISTERS;
27859 bool is_64bit_ms_abi = (TARGET_64BIT
27860 && ix86_function_abi (fndecl) == MS_ABI);
27861 char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi);
27863 /* If there are no caller-saved registers, add all registers
27864 that are clobbered by the call which returns. */
27865 for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27866 if (!fixed_regs[i]
27867 && (ix86_call_used_regs[i] == 1
27868 || (ix86_call_used_regs[i] & c_mask))
27869 && !STACK_REGNO_P (i)
27870 && !MMX_REGNO_P (i))
27871 clobber_reg (&use,
27872 gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i));
27874 else if (TARGET_64BIT_MS_ABI
27875 && (!callarg2 || INTVAL (callarg2) != -2))
27877 unsigned i;
27879 for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++)
27881 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
27882 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
27884 clobber_reg (&use, gen_rtx_REG (mode, regno));
27887 /* Set here, but it may get cleared later. */
27888 if (TARGET_CALL_MS2SYSV_XLOGUES)
27890 if (!TARGET_SSE)
27893 /* Don't break hot-patched functions. */
27894 else if (ix86_function_ms_hook_prologue (current_function_decl))
27897 /* TODO: Cases not yet examined. */
27898 else if (flag_split_stack)
27899 warn_once_call_ms2sysv_xlogues ("-fsplit-stack");
27901 else
27903 gcc_assert (!reload_completed);
27904 cfun->machine->call_ms2sysv = true;
27909 if (vec_len > 1)
27910 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
27911 call = emit_call_insn (call);
27912 if (use)
27913 CALL_INSN_FUNCTION_USAGE (call) = use;
27915 return call;
27918 /* Return true if the function being called was marked with attribute
27919 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
27920 to handle the non-PIC case in the backend because there is no easy
27921 interface for the front-end to force non-PLT calls to use the GOT.
27922 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
27923 to call the function marked "noplt" indirectly. */
27925 static bool
27926 ix86_nopic_noplt_attribute_p (rtx call_op)
27928 if (flag_pic || ix86_cmodel == CM_LARGE
27929 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
27930 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
27931 || SYMBOL_REF_LOCAL_P (call_op))
27932 return false;
27934 tree symbol_decl = SYMBOL_REF_DECL (call_op);
27936 if (!flag_plt
27937 || (symbol_decl != NULL_TREE
27938 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
27939 return true;
27941 return false;
27944 /* Output the assembly for a call instruction. */
27946 const char *
27947 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
27949 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
27950 bool seh_nop_p = false;
27951 const char *xasm;
27953 if (SIBLING_CALL_P (insn))
27955 if (direct_p)
27957 if (ix86_nopic_noplt_attribute_p (call_op))
27959 if (TARGET_64BIT)
27960 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
27961 else
27962 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
27964 else
27965 xasm = "%!jmp\t%P0";
27967 /* SEH epilogue detection requires the indirect branch case
27968 to include REX.W. */
27969 else if (TARGET_SEH)
27970 xasm = "%!rex.W jmp\t%A0";
27971 else
27972 xasm = "%!jmp\t%A0";
27974 output_asm_insn (xasm, &call_op);
27975 return "";
27978 /* SEH unwinding can require an extra nop to be emitted in several
27979 circumstances. Determine if we have one of those. */
27980 if (TARGET_SEH)
27982 rtx_insn *i;
27984 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
27986 /* If we get to another real insn, we don't need the nop. */
27987 if (INSN_P (i))
27988 break;
27990 /* If we get to the epilogue note, prevent a catch region from
27991 being adjacent to the standard epilogue sequence. If non-
27992 call-exceptions, we'll have done this during epilogue emission. */
27993 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
27994 && !flag_non_call_exceptions
27995 && !can_throw_internal (insn))
27997 seh_nop_p = true;
27998 break;
28002 /* If we didn't find a real insn following the call, prevent the
28003 unwinder from looking into the next function. */
28004 if (i == NULL)
28005 seh_nop_p = true;
28008 if (direct_p)
28010 if (ix86_nopic_noplt_attribute_p (call_op))
28012 if (TARGET_64BIT)
28013 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
28014 else
28015 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
28017 else
28018 xasm = "%!call\t%P0";
28020 else
28021 xasm = "%!call\t%A0";
28023 output_asm_insn (xasm, &call_op);
28025 if (seh_nop_p)
28026 return "nop";
28028 return "";
28031 /* Clear stack slot assignments remembered from previous functions.
28032 This is called from INIT_EXPANDERS once before RTL is emitted for each
28033 function. */
28035 static struct machine_function *
28036 ix86_init_machine_status (void)
28038 struct machine_function *f;
28040 f = ggc_cleared_alloc<machine_function> ();
28041 f->call_abi = ix86_abi;
28043 return f;
28046 /* Return a MEM corresponding to a stack slot with mode MODE.
28047 Allocate a new slot if necessary.
28049 The RTL for a function can have several slots available: N is
28050 which slot to use. */
28053 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
28055 struct stack_local_entry *s;
28057 gcc_assert (n < MAX_386_STACK_LOCALS);
28059 for (s = ix86_stack_locals; s; s = s->next)
28060 if (s->mode == mode && s->n == n)
28061 return validize_mem (copy_rtx (s->rtl));
28063 s = ggc_alloc<stack_local_entry> ();
28064 s->n = n;
28065 s->mode = mode;
28066 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
28068 s->next = ix86_stack_locals;
28069 ix86_stack_locals = s;
28070 return validize_mem (copy_rtx (s->rtl));
28073 static void
28074 ix86_instantiate_decls (void)
28076 struct stack_local_entry *s;
28078 for (s = ix86_stack_locals; s; s = s->next)
28079 if (s->rtl != NULL_RTX)
28080 instantiate_decl_rtl (s->rtl);
28083 /* Return the number used for encoding REG, in the range 0..7. */
28085 static int
28086 reg_encoded_number (rtx reg)
28088 unsigned regno = REGNO (reg);
28089 switch (regno)
28091 case AX_REG:
28092 return 0;
28093 case CX_REG:
28094 return 1;
28095 case DX_REG:
28096 return 2;
28097 case BX_REG:
28098 return 3;
28099 case SP_REG:
28100 return 4;
28101 case BP_REG:
28102 return 5;
28103 case SI_REG:
28104 return 6;
28105 case DI_REG:
28106 return 7;
28107 default:
28108 break;
28110 if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG))
28111 return regno - FIRST_STACK_REG;
28112 if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))
28113 return regno - FIRST_SSE_REG;
28114 if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG))
28115 return regno - FIRST_MMX_REG;
28116 if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
28117 return regno - FIRST_REX_SSE_REG;
28118 if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
28119 return regno - FIRST_REX_INT_REG;
28120 if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG))
28121 return regno - FIRST_MASK_REG;
28122 if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG))
28123 return regno - FIRST_BND_REG;
28124 return -1;
28127 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
28128 in its encoding if it could be relevant for ROP mitigation, otherwise
28129 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
28130 used for calculating it into them. */
28132 static int
28133 ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
28134 int *popno0 = 0, int *popno1 = 0)
28136 if (asm_noperands (PATTERN (insn)) >= 0)
28137 return -1;
28138 int has_modrm = get_attr_modrm (insn);
28139 if (!has_modrm)
28140 return -1;
28141 enum attr_modrm_class cls = get_attr_modrm_class (insn);
28142 rtx op0, op1;
28143 switch (cls)
28145 case MODRM_CLASS_OP02:
28146 gcc_assert (noperands >= 3);
28147 if (popno0)
28149 *popno0 = 0;
28150 *popno1 = 2;
28152 op0 = operands[0];
28153 op1 = operands[2];
28154 break;
28155 case MODRM_CLASS_OP01:
28156 gcc_assert (noperands >= 2);
28157 if (popno0)
28159 *popno0 = 0;
28160 *popno1 = 1;
28162 op0 = operands[0];
28163 op1 = operands[1];
28164 break;
28165 default:
28166 return -1;
28168 if (REG_P (op0) && REG_P (op1))
28170 int enc0 = reg_encoded_number (op0);
28171 int enc1 = reg_encoded_number (op1);
28172 return 0xc0 + (enc1 << 3) + enc0;
28174 return -1;
28177 /* Check whether x86 address PARTS is a pc-relative address. */
28179 bool
28180 ix86_rip_relative_addr_p (struct ix86_address *parts)
28182 rtx base, index, disp;
28184 base = parts->base;
28185 index = parts->index;
28186 disp = parts->disp;
28188 if (disp && !base && !index)
28190 if (TARGET_64BIT)
28192 rtx symbol = disp;
28194 if (GET_CODE (disp) == CONST)
28195 symbol = XEXP (disp, 0);
28196 if (GET_CODE (symbol) == PLUS
28197 && CONST_INT_P (XEXP (symbol, 1)))
28198 symbol = XEXP (symbol, 0);
28200 if (GET_CODE (symbol) == LABEL_REF
28201 || (GET_CODE (symbol) == SYMBOL_REF
28202 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
28203 || (GET_CODE (symbol) == UNSPEC
28204 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
28205 || XINT (symbol, 1) == UNSPEC_PCREL
28206 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
28207 return true;
28210 return false;
28213 /* Calculate the length of the memory address in the instruction encoding.
28214 Includes addr32 prefix, does not include the one-byte modrm, opcode,
28215 or other prefixes. We never generate addr32 prefix for LEA insn. */
28218 memory_address_length (rtx addr, bool lea)
28220 struct ix86_address parts;
28221 rtx base, index, disp;
28222 int len;
28223 int ok;
28225 if (GET_CODE (addr) == PRE_DEC
28226 || GET_CODE (addr) == POST_INC
28227 || GET_CODE (addr) == PRE_MODIFY
28228 || GET_CODE (addr) == POST_MODIFY)
28229 return 0;
28231 ok = ix86_decompose_address (addr, &parts);
28232 gcc_assert (ok);
28234 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
28236 /* If this is not LEA instruction, add the length of addr32 prefix. */
28237 if (TARGET_64BIT && !lea
28238 && (SImode_address_operand (addr, VOIDmode)
28239 || (parts.base && GET_MODE (parts.base) == SImode)
28240 || (parts.index && GET_MODE (parts.index) == SImode)))
28241 len++;
28243 base = parts.base;
28244 index = parts.index;
28245 disp = parts.disp;
28247 if (base && SUBREG_P (base))
28248 base = SUBREG_REG (base);
28249 if (index && SUBREG_P (index))
28250 index = SUBREG_REG (index);
28252 gcc_assert (base == NULL_RTX || REG_P (base));
28253 gcc_assert (index == NULL_RTX || REG_P (index));
28255 /* Rule of thumb:
28256 - esp as the base always wants an index,
28257 - ebp as the base always wants a displacement,
28258 - r12 as the base always wants an index,
28259 - r13 as the base always wants a displacement. */
28261 /* Register Indirect. */
28262 if (base && !index && !disp)
28264 /* esp (for its index) and ebp (for its displacement) need
28265 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
28266 code. */
28267 if (base == arg_pointer_rtx
28268 || base == frame_pointer_rtx
28269 || REGNO (base) == SP_REG
28270 || REGNO (base) == BP_REG
28271 || REGNO (base) == R12_REG
28272 || REGNO (base) == R13_REG)
28273 len++;
28276 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
28277 is not disp32, but disp32(%rip), so for disp32
28278 SIB byte is needed, unless print_operand_address
28279 optimizes it into disp32(%rip) or (%rip) is implied
28280 by UNSPEC. */
28281 else if (disp && !base && !index)
28283 len += 4;
28284 if (!ix86_rip_relative_addr_p (&parts))
28285 len++;
28287 else
28289 /* Find the length of the displacement constant. */
28290 if (disp)
28292 if (base && satisfies_constraint_K (disp))
28293 len += 1;
28294 else
28295 len += 4;
28297 /* ebp always wants a displacement. Similarly r13. */
28298 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
28299 len++;
28301 /* An index requires the two-byte modrm form.... */
28302 if (index
28303 /* ...like esp (or r12), which always wants an index. */
28304 || base == arg_pointer_rtx
28305 || base == frame_pointer_rtx
28306 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
28307 len++;
28310 return len;
28313 /* Compute default value for "length_immediate" attribute. When SHORTFORM
28314 is set, expect that insn have 8bit immediate alternative. */
28316 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
28318 int len = 0;
28319 int i;
28320 extract_insn_cached (insn);
28321 for (i = recog_data.n_operands - 1; i >= 0; --i)
28322 if (CONSTANT_P (recog_data.operand[i]))
28324 enum attr_mode mode = get_attr_mode (insn);
28326 gcc_assert (!len);
28327 if (shortform && CONST_INT_P (recog_data.operand[i]))
28329 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
28330 switch (mode)
28332 case MODE_QI:
28333 len = 1;
28334 continue;
28335 case MODE_HI:
28336 ival = trunc_int_for_mode (ival, HImode);
28337 break;
28338 case MODE_SI:
28339 ival = trunc_int_for_mode (ival, SImode);
28340 break;
28341 default:
28342 break;
28344 if (IN_RANGE (ival, -128, 127))
28346 len = 1;
28347 continue;
28350 switch (mode)
28352 case MODE_QI:
28353 len = 1;
28354 break;
28355 case MODE_HI:
28356 len = 2;
28357 break;
28358 case MODE_SI:
28359 len = 4;
28360 break;
28361 /* Immediates for DImode instructions are encoded
28362 as 32bit sign extended values. */
28363 case MODE_DI:
28364 len = 4;
28365 break;
28366 default:
28367 fatal_insn ("unknown insn mode", insn);
28370 return len;
28373 /* Compute default value for "length_address" attribute. */
28375 ix86_attr_length_address_default (rtx_insn *insn)
28377 int i;
28379 if (get_attr_type (insn) == TYPE_LEA)
28381 rtx set = PATTERN (insn), addr;
28383 if (GET_CODE (set) == PARALLEL)
28384 set = XVECEXP (set, 0, 0);
28386 gcc_assert (GET_CODE (set) == SET);
28388 addr = SET_SRC (set);
28390 return memory_address_length (addr, true);
28393 extract_insn_cached (insn);
28394 for (i = recog_data.n_operands - 1; i >= 0; --i)
28396 rtx op = recog_data.operand[i];
28397 if (MEM_P (op))
28399 constrain_operands_cached (insn, reload_completed);
28400 if (which_alternative != -1)
28402 const char *constraints = recog_data.constraints[i];
28403 int alt = which_alternative;
28405 while (*constraints == '=' || *constraints == '+')
28406 constraints++;
28407 while (alt-- > 0)
28408 while (*constraints++ != ',')
28410 /* Skip ignored operands. */
28411 if (*constraints == 'X')
28412 continue;
28415 int len = memory_address_length (XEXP (op, 0), false);
28417 /* Account for segment prefix for non-default addr spaces. */
28418 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
28419 len++;
28421 return len;
28424 return 0;
28427 /* Compute default value for "length_vex" attribute. It includes
28428 2 or 3 byte VEX prefix and 1 opcode byte. */
28431 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
28432 bool has_vex_w)
28434 int i;
28436 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
28437 byte VEX prefix. */
28438 if (!has_0f_opcode || has_vex_w)
28439 return 3 + 1;
28441 /* We can always use 2 byte VEX prefix in 32bit. */
28442 if (!TARGET_64BIT)
28443 return 2 + 1;
28445 extract_insn_cached (insn);
28447 for (i = recog_data.n_operands - 1; i >= 0; --i)
28448 if (REG_P (recog_data.operand[i]))
28450 /* REX.W bit uses 3 byte VEX prefix. */
28451 if (GET_MODE (recog_data.operand[i]) == DImode
28452 && GENERAL_REG_P (recog_data.operand[i]))
28453 return 3 + 1;
28455 else
28457 /* REX.X or REX.B bits use 3 byte VEX prefix. */
28458 if (MEM_P (recog_data.operand[i])
28459 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
28460 return 3 + 1;
28463 return 2 + 1;
28467 static bool
28468 ix86_class_likely_spilled_p (reg_class_t);
28470 /* Returns true if lhs of insn is HW function argument register and set up
28471 is_spilled to true if it is likely spilled HW register. */
28472 static bool
28473 insn_is_function_arg (rtx insn, bool* is_spilled)
28475 rtx dst;
28477 if (!NONDEBUG_INSN_P (insn))
28478 return false;
28479 /* Call instructions are not movable, ignore it. */
28480 if (CALL_P (insn))
28481 return false;
28482 insn = PATTERN (insn);
28483 if (GET_CODE (insn) == PARALLEL)
28484 insn = XVECEXP (insn, 0, 0);
28485 if (GET_CODE (insn) != SET)
28486 return false;
28487 dst = SET_DEST (insn);
28488 if (REG_P (dst) && HARD_REGISTER_P (dst)
28489 && ix86_function_arg_regno_p (REGNO (dst)))
28491 /* Is it likely spilled HW register? */
28492 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
28493 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
28494 *is_spilled = true;
28495 return true;
28497 return false;
28500 /* Add output dependencies for chain of function adjacent arguments if only
28501 there is a move to likely spilled HW register. Return first argument
28502 if at least one dependence was added or NULL otherwise. */
28503 static rtx_insn *
28504 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
28506 rtx_insn *insn;
28507 rtx_insn *last = call;
28508 rtx_insn *first_arg = NULL;
28509 bool is_spilled = false;
28511 head = PREV_INSN (head);
28513 /* Find nearest to call argument passing instruction. */
28514 while (true)
28516 last = PREV_INSN (last);
28517 if (last == head)
28518 return NULL;
28519 if (!NONDEBUG_INSN_P (last))
28520 continue;
28521 if (insn_is_function_arg (last, &is_spilled))
28522 break;
28523 return NULL;
28526 first_arg = last;
28527 while (true)
28529 insn = PREV_INSN (last);
28530 if (!INSN_P (insn))
28531 break;
28532 if (insn == head)
28533 break;
28534 if (!NONDEBUG_INSN_P (insn))
28536 last = insn;
28537 continue;
28539 if (insn_is_function_arg (insn, &is_spilled))
28541 /* Add output depdendence between two function arguments if chain
28542 of output arguments contains likely spilled HW registers. */
28543 if (is_spilled)
28544 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28545 first_arg = last = insn;
28547 else
28548 break;
28550 if (!is_spilled)
28551 return NULL;
28552 return first_arg;
28555 /* Add output or anti dependency from insn to first_arg to restrict its code
28556 motion. */
28557 static void
28558 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
28560 rtx set;
28561 rtx tmp;
28563 /* Add anti dependencies for bounds stores. */
28564 if (INSN_P (insn)
28565 && GET_CODE (PATTERN (insn)) == PARALLEL
28566 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
28567 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
28569 add_dependence (first_arg, insn, REG_DEP_ANTI);
28570 return;
28573 set = single_set (insn);
28574 if (!set)
28575 return;
28576 tmp = SET_DEST (set);
28577 if (REG_P (tmp))
28579 /* Add output dependency to the first function argument. */
28580 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28581 return;
28583 /* Add anti dependency. */
28584 add_dependence (first_arg, insn, REG_DEP_ANTI);
28587 /* Avoid cross block motion of function argument through adding dependency
28588 from the first non-jump instruction in bb. */
28589 static void
28590 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
28592 rtx_insn *insn = BB_END (bb);
28594 while (insn)
28596 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
28598 rtx set = single_set (insn);
28599 if (set)
28601 avoid_func_arg_motion (arg, insn);
28602 return;
28605 if (insn == BB_HEAD (bb))
28606 return;
28607 insn = PREV_INSN (insn);
28611 /* Hook for pre-reload schedule - avoid motion of function arguments
28612 passed in likely spilled HW registers. */
28613 static void
28614 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
28616 rtx_insn *insn;
28617 rtx_insn *first_arg = NULL;
28618 if (reload_completed)
28619 return;
28620 while (head != tail && DEBUG_INSN_P (head))
28621 head = NEXT_INSN (head);
28622 for (insn = tail; insn != head; insn = PREV_INSN (insn))
28623 if (INSN_P (insn) && CALL_P (insn))
28625 first_arg = add_parameter_dependencies (insn, head);
28626 if (first_arg)
28628 /* Add dependee for first argument to predecessors if only
28629 region contains more than one block. */
28630 basic_block bb = BLOCK_FOR_INSN (insn);
28631 int rgn = CONTAINING_RGN (bb->index);
28632 int nr_blks = RGN_NR_BLOCKS (rgn);
28633 /* Skip trivial regions and region head blocks that can have
28634 predecessors outside of region. */
28635 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
28637 edge e;
28638 edge_iterator ei;
28640 /* Regions are SCCs with the exception of selective
28641 scheduling with pipelining of outer blocks enabled.
28642 So also check that immediate predecessors of a non-head
28643 block are in the same region. */
28644 FOR_EACH_EDGE (e, ei, bb->preds)
28646 /* Avoid creating of loop-carried dependencies through
28647 using topological ordering in the region. */
28648 if (rgn == CONTAINING_RGN (e->src->index)
28649 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
28650 add_dependee_for_func_arg (first_arg, e->src);
28653 insn = first_arg;
28654 if (insn == head)
28655 break;
28658 else if (first_arg)
28659 avoid_func_arg_motion (first_arg, insn);
28662 /* Hook for pre-reload schedule - set priority of moves from likely spilled
28663 HW registers to maximum, to schedule them at soon as possible. These are
28664 moves from function argument registers at the top of the function entry
28665 and moves from function return value registers after call. */
28666 static int
28667 ix86_adjust_priority (rtx_insn *insn, int priority)
28669 rtx set;
28671 if (reload_completed)
28672 return priority;
28674 if (!NONDEBUG_INSN_P (insn))
28675 return priority;
28677 set = single_set (insn);
28678 if (set)
28680 rtx tmp = SET_SRC (set);
28681 if (REG_P (tmp)
28682 && HARD_REGISTER_P (tmp)
28683 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
28684 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
28685 return current_sched_info->sched_max_insns_priority;
28688 return priority;
28691 /* Prepare for scheduling pass. */
28692 static void
28693 ix86_sched_init_global (FILE *, int, int)
28695 /* Install scheduling hooks for current CPU. Some of these hooks are used
28696 in time-critical parts of the scheduler, so we only set them up when
28697 they are actually used. */
28698 switch (ix86_tune)
28700 case PROCESSOR_CORE2:
28701 case PROCESSOR_NEHALEM:
28702 case PROCESSOR_SANDYBRIDGE:
28703 case PROCESSOR_HASWELL:
28704 /* Do not perform multipass scheduling for pre-reload schedule
28705 to save compile time. */
28706 if (reload_completed)
28708 ix86_core2i7_init_hooks ();
28709 break;
28711 /* Fall through. */
28712 default:
28713 targetm.sched.dfa_post_advance_cycle = NULL;
28714 targetm.sched.first_cycle_multipass_init = NULL;
28715 targetm.sched.first_cycle_multipass_begin = NULL;
28716 targetm.sched.first_cycle_multipass_issue = NULL;
28717 targetm.sched.first_cycle_multipass_backtrack = NULL;
28718 targetm.sched.first_cycle_multipass_end = NULL;
28719 targetm.sched.first_cycle_multipass_fini = NULL;
28720 break;
28725 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
28727 static HOST_WIDE_INT
28728 ix86_static_rtx_alignment (machine_mode mode)
28730 if (mode == DFmode)
28731 return 64;
28732 if (ALIGN_MODE_128 (mode))
28733 return MAX (128, GET_MODE_ALIGNMENT (mode));
28734 return GET_MODE_ALIGNMENT (mode);
28737 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28739 static HOST_WIDE_INT
28740 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28742 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
28743 || TREE_CODE (exp) == INTEGER_CST)
28745 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
28746 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
28747 return MAX (mode_align, align);
28749 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
28750 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
28751 return BITS_PER_WORD;
28753 return align;
28756 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
28757 the data type, and ALIGN is the alignment that the object would
28758 ordinarily have. */
28760 static int
28761 iamcu_alignment (tree type, int align)
28763 machine_mode mode;
28765 if (align < 32 || TYPE_USER_ALIGN (type))
28766 return align;
28768 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
28769 bytes. */
28770 mode = TYPE_MODE (strip_array_types (type));
28771 switch (GET_MODE_CLASS (mode))
28773 case MODE_INT:
28774 case MODE_COMPLEX_INT:
28775 case MODE_COMPLEX_FLOAT:
28776 case MODE_FLOAT:
28777 case MODE_DECIMAL_FLOAT:
28778 return 32;
28779 default:
28780 return align;
28784 /* Compute the alignment for a static variable.
28785 TYPE is the data type, and ALIGN is the alignment that
28786 the object would ordinarily have. The value of this function is used
28787 instead of that alignment to align the object. */
28790 ix86_data_alignment (tree type, int align, bool opt)
28792 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
28793 for symbols from other compilation units or symbols that don't need
28794 to bind locally. In order to preserve some ABI compatibility with
28795 those compilers, ensure we don't decrease alignment from what we
28796 used to assume. */
28798 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
28800 /* A data structure, equal or greater than the size of a cache line
28801 (64 bytes in the Pentium 4 and other recent Intel processors, including
28802 processors based on Intel Core microarchitecture) should be aligned
28803 so that its base address is a multiple of a cache line size. */
28805 int max_align
28806 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
28808 if (max_align < BITS_PER_WORD)
28809 max_align = BITS_PER_WORD;
28811 switch (ix86_align_data_type)
28813 case ix86_align_data_type_abi: opt = false; break;
28814 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
28815 case ix86_align_data_type_cacheline: break;
28818 if (TARGET_IAMCU)
28819 align = iamcu_alignment (type, align);
28821 if (opt
28822 && AGGREGATE_TYPE_P (type)
28823 && TYPE_SIZE (type)
28824 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
28826 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
28827 && align < max_align_compat)
28828 align = max_align_compat;
28829 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
28830 && align < max_align)
28831 align = max_align;
28834 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
28835 to 16byte boundary. */
28836 if (TARGET_64BIT)
28838 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
28839 && TYPE_SIZE (type)
28840 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
28841 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
28842 && align < 128)
28843 return 128;
28846 if (!opt)
28847 return align;
28849 if (TREE_CODE (type) == ARRAY_TYPE)
28851 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
28852 return 64;
28853 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
28854 return 128;
28856 else if (TREE_CODE (type) == COMPLEX_TYPE)
28859 if (TYPE_MODE (type) == DCmode && align < 64)
28860 return 64;
28861 if ((TYPE_MODE (type) == XCmode
28862 || TYPE_MODE (type) == TCmode) && align < 128)
28863 return 128;
28865 else if ((TREE_CODE (type) == RECORD_TYPE
28866 || TREE_CODE (type) == UNION_TYPE
28867 || TREE_CODE (type) == QUAL_UNION_TYPE)
28868 && TYPE_FIELDS (type))
28870 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
28871 return 64;
28872 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
28873 return 128;
28875 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
28876 || TREE_CODE (type) == INTEGER_TYPE)
28878 if (TYPE_MODE (type) == DFmode && align < 64)
28879 return 64;
28880 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
28881 return 128;
28884 return align;
28887 /* Compute the alignment for a local variable or a stack slot. EXP is
28888 the data type or decl itself, MODE is the widest mode available and
28889 ALIGN is the alignment that the object would ordinarily have. The
28890 value of this macro is used instead of that alignment to align the
28891 object. */
28893 unsigned int
28894 ix86_local_alignment (tree exp, machine_mode mode,
28895 unsigned int align)
28897 tree type, decl;
28899 if (exp && DECL_P (exp))
28901 type = TREE_TYPE (exp);
28902 decl = exp;
28904 else
28906 type = exp;
28907 decl = NULL;
28910 /* Don't do dynamic stack realignment for long long objects with
28911 -mpreferred-stack-boundary=2. */
28912 if (!TARGET_64BIT
28913 && align == 64
28914 && ix86_preferred_stack_boundary < 64
28915 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
28916 && (!type || !TYPE_USER_ALIGN (type))
28917 && (!decl || !DECL_USER_ALIGN (decl)))
28918 align = 32;
28920 /* If TYPE is NULL, we are allocating a stack slot for caller-save
28921 register in MODE. We will return the largest alignment of XF
28922 and DF. */
28923 if (!type)
28925 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
28926 align = GET_MODE_ALIGNMENT (DFmode);
28927 return align;
28930 /* Don't increase alignment for Intel MCU psABI. */
28931 if (TARGET_IAMCU)
28932 return align;
28934 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
28935 to 16byte boundary. Exact wording is:
28937 An array uses the same alignment as its elements, except that a local or
28938 global array variable of length at least 16 bytes or
28939 a C99 variable-length array variable always has alignment of at least 16 bytes.
28941 This was added to allow use of aligned SSE instructions at arrays. This
28942 rule is meant for static storage (where compiler can not do the analysis
28943 by itself). We follow it for automatic variables only when convenient.
28944 We fully control everything in the function compiled and functions from
28945 other unit can not rely on the alignment.
28947 Exclude va_list type. It is the common case of local array where
28948 we can not benefit from the alignment.
28950 TODO: Probably one should optimize for size only when var is not escaping. */
28951 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
28952 && TARGET_SSE)
28954 if (AGGREGATE_TYPE_P (type)
28955 && (va_list_type_node == NULL_TREE
28956 || (TYPE_MAIN_VARIANT (type)
28957 != TYPE_MAIN_VARIANT (va_list_type_node)))
28958 && TYPE_SIZE (type)
28959 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
28960 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
28961 && align < 128)
28962 return 128;
28964 if (TREE_CODE (type) == ARRAY_TYPE)
28966 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
28967 return 64;
28968 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
28969 return 128;
28971 else if (TREE_CODE (type) == COMPLEX_TYPE)
28973 if (TYPE_MODE (type) == DCmode && align < 64)
28974 return 64;
28975 if ((TYPE_MODE (type) == XCmode
28976 || TYPE_MODE (type) == TCmode) && align < 128)
28977 return 128;
28979 else if ((TREE_CODE (type) == RECORD_TYPE
28980 || TREE_CODE (type) == UNION_TYPE
28981 || TREE_CODE (type) == QUAL_UNION_TYPE)
28982 && TYPE_FIELDS (type))
28984 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
28985 return 64;
28986 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
28987 return 128;
28989 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
28990 || TREE_CODE (type) == INTEGER_TYPE)
28993 if (TYPE_MODE (type) == DFmode && align < 64)
28994 return 64;
28995 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
28996 return 128;
28998 return align;
29001 /* Compute the minimum required alignment for dynamic stack realignment
29002 purposes for a local variable, parameter or a stack slot. EXP is
29003 the data type or decl itself, MODE is its mode and ALIGN is the
29004 alignment that the object would ordinarily have. */
29006 unsigned int
29007 ix86_minimum_alignment (tree exp, machine_mode mode,
29008 unsigned int align)
29010 tree type, decl;
29012 if (exp && DECL_P (exp))
29014 type = TREE_TYPE (exp);
29015 decl = exp;
29017 else
29019 type = exp;
29020 decl = NULL;
29023 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
29024 return align;
29026 /* Don't do dynamic stack realignment for long long objects with
29027 -mpreferred-stack-boundary=2. */
29028 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
29029 && (!type || !TYPE_USER_ALIGN (type))
29030 && (!decl || !DECL_USER_ALIGN (decl)))
29032 gcc_checking_assert (!TARGET_STV);
29033 return 32;
29036 return align;
29039 /* Find a location for the static chain incoming to a nested function.
29040 This is a register, unless all free registers are used by arguments. */
29042 static rtx
29043 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
29045 unsigned regno;
29047 /* While this function won't be called by the middle-end when a static
29048 chain isn't needed, it's also used throughout the backend so it's
29049 easiest to keep this check centralized. */
29050 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
29051 return NULL;
29053 if (TARGET_64BIT)
29055 /* We always use R10 in 64-bit mode. */
29056 regno = R10_REG;
29058 else
29060 const_tree fntype, fndecl;
29061 unsigned int ccvt;
29063 /* By default in 32-bit mode we use ECX to pass the static chain. */
29064 regno = CX_REG;
29066 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
29068 fntype = TREE_TYPE (fndecl_or_type);
29069 fndecl = fndecl_or_type;
29071 else
29073 fntype = fndecl_or_type;
29074 fndecl = NULL;
29077 ccvt = ix86_get_callcvt (fntype);
29078 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29080 /* Fastcall functions use ecx/edx for arguments, which leaves
29081 us with EAX for the static chain.
29082 Thiscall functions use ecx for arguments, which also
29083 leaves us with EAX for the static chain. */
29084 regno = AX_REG;
29086 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29088 /* Thiscall functions use ecx for arguments, which leaves
29089 us with EAX and EDX for the static chain.
29090 We are using for abi-compatibility EAX. */
29091 regno = AX_REG;
29093 else if (ix86_function_regparm (fntype, fndecl) == 3)
29095 /* For regparm 3, we have no free call-clobbered registers in
29096 which to store the static chain. In order to implement this,
29097 we have the trampoline push the static chain to the stack.
29098 However, we can't push a value below the return address when
29099 we call the nested function directly, so we have to use an
29100 alternate entry point. For this we use ESI, and have the
29101 alternate entry point push ESI, so that things appear the
29102 same once we're executing the nested function. */
29103 if (incoming_p)
29105 if (fndecl == current_function_decl
29106 && !ix86_static_chain_on_stack)
29108 gcc_assert (!reload_completed);
29109 ix86_static_chain_on_stack = true;
29111 return gen_frame_mem (SImode,
29112 plus_constant (Pmode,
29113 arg_pointer_rtx, -8));
29115 regno = SI_REG;
29119 return gen_rtx_REG (Pmode, regno);
29122 /* Emit RTL insns to initialize the variable parts of a trampoline.
29123 FNDECL is the decl of the target address; M_TRAMP is a MEM for
29124 the trampoline, and CHAIN_VALUE is an RTX for the static chain
29125 to be passed to the target function. */
29127 static void
29128 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
29130 rtx mem, fnaddr;
29131 int opcode;
29132 int offset = 0;
29134 fnaddr = XEXP (DECL_RTL (fndecl), 0);
29136 if (TARGET_64BIT)
29138 int size;
29140 /* Load the function address to r11. Try to load address using
29141 the shorter movl instead of movabs. We may want to support
29142 movq for kernel mode, but kernel does not use trampolines at
29143 the moment. FNADDR is a 32bit address and may not be in
29144 DImode when ptr_mode == SImode. Always use movl in this
29145 case. */
29146 if (ptr_mode == SImode
29147 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
29149 fnaddr = copy_addr_to_reg (fnaddr);
29151 mem = adjust_address (m_tramp, HImode, offset);
29152 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
29154 mem = adjust_address (m_tramp, SImode, offset + 2);
29155 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
29156 offset += 6;
29158 else
29160 mem = adjust_address (m_tramp, HImode, offset);
29161 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
29163 mem = adjust_address (m_tramp, DImode, offset + 2);
29164 emit_move_insn (mem, fnaddr);
29165 offset += 10;
29168 /* Load static chain using movabs to r10. Use the shorter movl
29169 instead of movabs when ptr_mode == SImode. */
29170 if (ptr_mode == SImode)
29172 opcode = 0xba41;
29173 size = 6;
29175 else
29177 opcode = 0xba49;
29178 size = 10;
29181 mem = adjust_address (m_tramp, HImode, offset);
29182 emit_move_insn (mem, gen_int_mode (opcode, HImode));
29184 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
29185 emit_move_insn (mem, chain_value);
29186 offset += size;
29188 /* Jump to r11; the last (unused) byte is a nop, only there to
29189 pad the write out to a single 32-bit store. */
29190 mem = adjust_address (m_tramp, SImode, offset);
29191 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
29192 offset += 4;
29194 else
29196 rtx disp, chain;
29198 /* Depending on the static chain location, either load a register
29199 with a constant, or push the constant to the stack. All of the
29200 instructions are the same size. */
29201 chain = ix86_static_chain (fndecl, true);
29202 if (REG_P (chain))
29204 switch (REGNO (chain))
29206 case AX_REG:
29207 opcode = 0xb8; break;
29208 case CX_REG:
29209 opcode = 0xb9; break;
29210 default:
29211 gcc_unreachable ();
29214 else
29215 opcode = 0x68;
29217 mem = adjust_address (m_tramp, QImode, offset);
29218 emit_move_insn (mem, gen_int_mode (opcode, QImode));
29220 mem = adjust_address (m_tramp, SImode, offset + 1);
29221 emit_move_insn (mem, chain_value);
29222 offset += 5;
29224 mem = adjust_address (m_tramp, QImode, offset);
29225 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
29227 mem = adjust_address (m_tramp, SImode, offset + 1);
29229 /* Compute offset from the end of the jmp to the target function.
29230 In the case in which the trampoline stores the static chain on
29231 the stack, we need to skip the first insn which pushes the
29232 (call-saved) register static chain; this push is 1 byte. */
29233 offset += 5;
29234 disp = expand_binop (SImode, sub_optab, fnaddr,
29235 plus_constant (Pmode, XEXP (m_tramp, 0),
29236 offset - (MEM_P (chain) ? 1 : 0)),
29237 NULL_RTX, 1, OPTAB_DIRECT);
29238 emit_move_insn (mem, disp);
29241 gcc_assert (offset <= TRAMPOLINE_SIZE);
29243 #ifdef HAVE_ENABLE_EXECUTE_STACK
29244 #ifdef CHECK_EXECUTE_STACK_ENABLED
29245 if (CHECK_EXECUTE_STACK_ENABLED)
29246 #endif
29247 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
29248 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
29249 #endif
29252 static bool
29253 ix86_allocate_stack_slots_for_args (void)
29255 /* Naked functions should not allocate stack slots for arguments. */
29256 return !ix86_function_naked (current_function_decl);
29259 static bool
29260 ix86_warn_func_return (tree decl)
29262 /* Naked functions are implemented entirely in assembly, including the
29263 return sequence, so suppress warnings about this. */
29264 return !ix86_function_naked (decl);
29267 /* The following file contains several enumerations and data structures
29268 built from the definitions in i386-builtin-types.def. */
29270 #include "i386-builtin-types.inc"
29272 /* Table for the ix86 builtin non-function types. */
29273 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
29275 /* Retrieve an element from the above table, building some of
29276 the types lazily. */
29278 static tree
29279 ix86_get_builtin_type (enum ix86_builtin_type tcode)
29281 unsigned int index;
29282 tree type, itype;
29284 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
29286 type = ix86_builtin_type_tab[(int) tcode];
29287 if (type != NULL)
29288 return type;
29290 gcc_assert (tcode > IX86_BT_LAST_PRIM);
29291 if (tcode <= IX86_BT_LAST_VECT)
29293 machine_mode mode;
29295 index = tcode - IX86_BT_LAST_PRIM - 1;
29296 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
29297 mode = ix86_builtin_type_vect_mode[index];
29299 type = build_vector_type_for_mode (itype, mode);
29301 else
29303 int quals;
29305 index = tcode - IX86_BT_LAST_VECT - 1;
29306 if (tcode <= IX86_BT_LAST_PTR)
29307 quals = TYPE_UNQUALIFIED;
29308 else
29309 quals = TYPE_QUAL_CONST;
29311 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
29312 if (quals != TYPE_UNQUALIFIED)
29313 itype = build_qualified_type (itype, quals);
29315 type = build_pointer_type (itype);
29318 ix86_builtin_type_tab[(int) tcode] = type;
29319 return type;
29322 /* Table for the ix86 builtin function types. */
29323 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
29325 /* Retrieve an element from the above table, building some of
29326 the types lazily. */
29328 static tree
29329 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
29331 tree type;
29333 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
29335 type = ix86_builtin_func_type_tab[(int) tcode];
29336 if (type != NULL)
29337 return type;
29339 if (tcode <= IX86_BT_LAST_FUNC)
29341 unsigned start = ix86_builtin_func_start[(int) tcode];
29342 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
29343 tree rtype, atype, args = void_list_node;
29344 unsigned i;
29346 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
29347 for (i = after - 1; i > start; --i)
29349 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
29350 args = tree_cons (NULL, atype, args);
29353 type = build_function_type (rtype, args);
29355 else
29357 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
29358 enum ix86_builtin_func_type icode;
29360 icode = ix86_builtin_func_alias_base[index];
29361 type = ix86_get_builtin_func_type (icode);
29364 ix86_builtin_func_type_tab[(int) tcode] = type;
29365 return type;
29369 /* Codes for all the SSE/MMX builtins. Builtins not mentioned in any
29370 bdesc_* arrays below should come first, then builtins for each bdesc_*
29371 array in ascending order, so that we can use direct array accesses. */
29372 enum ix86_builtins
29374 IX86_BUILTIN_MASKMOVQ,
29375 IX86_BUILTIN_LDMXCSR,
29376 IX86_BUILTIN_STMXCSR,
29377 IX86_BUILTIN_MASKMOVDQU,
29378 IX86_BUILTIN_PSLLDQ128,
29379 IX86_BUILTIN_CLFLUSH,
29380 IX86_BUILTIN_MONITOR,
29381 IX86_BUILTIN_MWAIT,
29382 IX86_BUILTIN_CLZERO,
29383 IX86_BUILTIN_VEC_INIT_V2SI,
29384 IX86_BUILTIN_VEC_INIT_V4HI,
29385 IX86_BUILTIN_VEC_INIT_V8QI,
29386 IX86_BUILTIN_VEC_EXT_V2DF,
29387 IX86_BUILTIN_VEC_EXT_V2DI,
29388 IX86_BUILTIN_VEC_EXT_V4SF,
29389 IX86_BUILTIN_VEC_EXT_V4SI,
29390 IX86_BUILTIN_VEC_EXT_V8HI,
29391 IX86_BUILTIN_VEC_EXT_V2SI,
29392 IX86_BUILTIN_VEC_EXT_V4HI,
29393 IX86_BUILTIN_VEC_EXT_V16QI,
29394 IX86_BUILTIN_VEC_SET_V2DI,
29395 IX86_BUILTIN_VEC_SET_V4SF,
29396 IX86_BUILTIN_VEC_SET_V4SI,
29397 IX86_BUILTIN_VEC_SET_V8HI,
29398 IX86_BUILTIN_VEC_SET_V4HI,
29399 IX86_BUILTIN_VEC_SET_V16QI,
29400 IX86_BUILTIN_GATHERSIV2DF,
29401 IX86_BUILTIN_GATHERSIV4DF,
29402 IX86_BUILTIN_GATHERDIV2DF,
29403 IX86_BUILTIN_GATHERDIV4DF,
29404 IX86_BUILTIN_GATHERSIV4SF,
29405 IX86_BUILTIN_GATHERSIV8SF,
29406 IX86_BUILTIN_GATHERDIV4SF,
29407 IX86_BUILTIN_GATHERDIV8SF,
29408 IX86_BUILTIN_GATHERSIV2DI,
29409 IX86_BUILTIN_GATHERSIV4DI,
29410 IX86_BUILTIN_GATHERDIV2DI,
29411 IX86_BUILTIN_GATHERDIV4DI,
29412 IX86_BUILTIN_GATHERSIV4SI,
29413 IX86_BUILTIN_GATHERSIV8SI,
29414 IX86_BUILTIN_GATHERDIV4SI,
29415 IX86_BUILTIN_GATHERDIV8SI,
29416 IX86_BUILTIN_VFMSUBSD3_MASK3,
29417 IX86_BUILTIN_VFMSUBSS3_MASK3,
29418 IX86_BUILTIN_GATHER3SIV8SF,
29419 IX86_BUILTIN_GATHER3SIV4SF,
29420 IX86_BUILTIN_GATHER3SIV4DF,
29421 IX86_BUILTIN_GATHER3SIV2DF,
29422 IX86_BUILTIN_GATHER3DIV8SF,
29423 IX86_BUILTIN_GATHER3DIV4SF,
29424 IX86_BUILTIN_GATHER3DIV4DF,
29425 IX86_BUILTIN_GATHER3DIV2DF,
29426 IX86_BUILTIN_GATHER3SIV8SI,
29427 IX86_BUILTIN_GATHER3SIV4SI,
29428 IX86_BUILTIN_GATHER3SIV4DI,
29429 IX86_BUILTIN_GATHER3SIV2DI,
29430 IX86_BUILTIN_GATHER3DIV8SI,
29431 IX86_BUILTIN_GATHER3DIV4SI,
29432 IX86_BUILTIN_GATHER3DIV4DI,
29433 IX86_BUILTIN_GATHER3DIV2DI,
29434 IX86_BUILTIN_SCATTERSIV8SF,
29435 IX86_BUILTIN_SCATTERSIV4SF,
29436 IX86_BUILTIN_SCATTERSIV4DF,
29437 IX86_BUILTIN_SCATTERSIV2DF,
29438 IX86_BUILTIN_SCATTERDIV8SF,
29439 IX86_BUILTIN_SCATTERDIV4SF,
29440 IX86_BUILTIN_SCATTERDIV4DF,
29441 IX86_BUILTIN_SCATTERDIV2DF,
29442 IX86_BUILTIN_SCATTERSIV8SI,
29443 IX86_BUILTIN_SCATTERSIV4SI,
29444 IX86_BUILTIN_SCATTERSIV4DI,
29445 IX86_BUILTIN_SCATTERSIV2DI,
29446 IX86_BUILTIN_SCATTERDIV8SI,
29447 IX86_BUILTIN_SCATTERDIV4SI,
29448 IX86_BUILTIN_SCATTERDIV4DI,
29449 IX86_BUILTIN_SCATTERDIV2DI,
29450 /* Alternate 4 and 8 element gather/scatter for the vectorizer
29451 where all operands are 32-byte or 64-byte wide respectively. */
29452 IX86_BUILTIN_GATHERALTSIV4DF,
29453 IX86_BUILTIN_GATHERALTDIV8SF,
29454 IX86_BUILTIN_GATHERALTSIV4DI,
29455 IX86_BUILTIN_GATHERALTDIV8SI,
29456 IX86_BUILTIN_GATHER3ALTDIV16SF,
29457 IX86_BUILTIN_GATHER3ALTDIV16SI,
29458 IX86_BUILTIN_GATHER3ALTSIV4DF,
29459 IX86_BUILTIN_GATHER3ALTDIV8SF,
29460 IX86_BUILTIN_GATHER3ALTSIV4DI,
29461 IX86_BUILTIN_GATHER3ALTDIV8SI,
29462 IX86_BUILTIN_GATHER3ALTSIV8DF,
29463 IX86_BUILTIN_GATHER3ALTSIV8DI,
29464 IX86_BUILTIN_GATHER3DIV16SF,
29465 IX86_BUILTIN_GATHER3DIV16SI,
29466 IX86_BUILTIN_GATHER3DIV8DF,
29467 IX86_BUILTIN_GATHER3DIV8DI,
29468 IX86_BUILTIN_GATHER3SIV16SF,
29469 IX86_BUILTIN_GATHER3SIV16SI,
29470 IX86_BUILTIN_GATHER3SIV8DF,
29471 IX86_BUILTIN_GATHER3SIV8DI,
29472 IX86_BUILTIN_SCATTERALTSIV8DF,
29473 IX86_BUILTIN_SCATTERALTDIV16SF,
29474 IX86_BUILTIN_SCATTERALTSIV8DI,
29475 IX86_BUILTIN_SCATTERALTDIV16SI,
29476 IX86_BUILTIN_SCATTERDIV16SF,
29477 IX86_BUILTIN_SCATTERDIV16SI,
29478 IX86_BUILTIN_SCATTERDIV8DF,
29479 IX86_BUILTIN_SCATTERDIV8DI,
29480 IX86_BUILTIN_SCATTERSIV16SF,
29481 IX86_BUILTIN_SCATTERSIV16SI,
29482 IX86_BUILTIN_SCATTERSIV8DF,
29483 IX86_BUILTIN_SCATTERSIV8DI,
29484 IX86_BUILTIN_GATHERPFQPD,
29485 IX86_BUILTIN_GATHERPFDPS,
29486 IX86_BUILTIN_GATHERPFDPD,
29487 IX86_BUILTIN_GATHERPFQPS,
29488 IX86_BUILTIN_SCATTERPFDPD,
29489 IX86_BUILTIN_SCATTERPFDPS,
29490 IX86_BUILTIN_SCATTERPFQPD,
29491 IX86_BUILTIN_SCATTERPFQPS,
29492 IX86_BUILTIN_CLWB,
29493 IX86_BUILTIN_CLFLUSHOPT,
29494 IX86_BUILTIN_INFQ,
29495 IX86_BUILTIN_HUGE_VALQ,
29496 IX86_BUILTIN_NANQ,
29497 IX86_BUILTIN_NANSQ,
29498 IX86_BUILTIN_XABORT,
29499 IX86_BUILTIN_ADDCARRYX32,
29500 IX86_BUILTIN_ADDCARRYX64,
29501 IX86_BUILTIN_SBB32,
29502 IX86_BUILTIN_SBB64,
29503 IX86_BUILTIN_RDRAND16_STEP,
29504 IX86_BUILTIN_RDRAND32_STEP,
29505 IX86_BUILTIN_RDRAND64_STEP,
29506 IX86_BUILTIN_RDSEED16_STEP,
29507 IX86_BUILTIN_RDSEED32_STEP,
29508 IX86_BUILTIN_RDSEED64_STEP,
29509 IX86_BUILTIN_MONITORX,
29510 IX86_BUILTIN_MWAITX,
29511 IX86_BUILTIN_CFSTRING,
29512 IX86_BUILTIN_CPU_INIT,
29513 IX86_BUILTIN_CPU_IS,
29514 IX86_BUILTIN_CPU_SUPPORTS,
29515 IX86_BUILTIN_READ_FLAGS,
29516 IX86_BUILTIN_WRITE_FLAGS,
29518 /* All the remaining builtins are tracked in bdesc_* arrays in
29519 i386-builtin.def. Don't add any IX86_BUILTIN_* enumerators after
29520 this point. */
29521 #define BDESC(mask, icode, name, code, comparison, flag) \
29522 code,
29523 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
29524 code, \
29525 IX86_BUILTIN__BDESC_##kindu##_FIRST = code,
29526 #define BDESC_END(kind, next_kind)
29528 #include "i386-builtin.def"
29530 #undef BDESC
29531 #undef BDESC_FIRST
29532 #undef BDESC_END
29534 IX86_BUILTIN_MAX,
29536 IX86_BUILTIN__BDESC_MAX_FIRST = IX86_BUILTIN_MAX,
29538 /* Now just the aliases for bdesc_* start/end. */
29539 #define BDESC(mask, icode, name, code, comparison, flag)
29540 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag)
29541 #define BDESC_END(kind, next_kind) \
29542 IX86_BUILTIN__BDESC_##kind##_LAST \
29543 = IX86_BUILTIN__BDESC_##next_kind##_FIRST - 1,
29545 #include "i386-builtin.def"
29547 #undef BDESC
29548 #undef BDESC_FIRST
29549 #undef BDESC_END
29551 /* Just to make sure there is no comma after the last enumerator. */
29552 IX86_BUILTIN__BDESC_MAX_LAST = IX86_BUILTIN__BDESC_MAX_FIRST
29555 /* Table for the ix86 builtin decls. */
29556 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
29558 /* Table of all of the builtin functions that are possible with different ISA's
29559 but are waiting to be built until a function is declared to use that
29560 ISA. */
29561 struct builtin_isa {
29562 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
29563 HOST_WIDE_INT isa2; /* additional isa_flags this builtin is defined for */
29564 const char *name; /* function name */
29565 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
29566 unsigned char const_p:1; /* true if the declaration is constant */
29567 unsigned char pure_p:1; /* true if the declaration has pure attribute */
29568 bool leaf_p; /* true if the declaration has leaf attribute */
29569 bool nothrow_p; /* true if the declaration has nothrow attribute */
29570 bool set_and_not_built_p;
29573 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
29575 /* Bits that can still enable any inclusion of a builtin. */
29576 static HOST_WIDE_INT deferred_isa_values = 0;
29577 static HOST_WIDE_INT deferred_isa_values2 = 0;
29579 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
29580 of which isa_flags to use in the ix86_builtins_isa array. Stores the
29581 function decl in the ix86_builtins array. Returns the function decl or
29582 NULL_TREE, if the builtin was not added.
29584 If the front end has a special hook for builtin functions, delay adding
29585 builtin functions that aren't in the current ISA until the ISA is changed
29586 with function specific optimization. Doing so, can save about 300K for the
29587 default compiler. When the builtin is expanded, check at that time whether
29588 it is valid.
29590 If the front end doesn't have a special hook, record all builtins, even if
29591 it isn't an instruction set in the current ISA in case the user uses
29592 function specific options for a different ISA, so that we don't get scope
29593 errors if a builtin is added in the middle of a function scope. */
29595 static inline tree
29596 def_builtin (HOST_WIDE_INT mask, const char *name,
29597 enum ix86_builtin_func_type tcode,
29598 enum ix86_builtins code)
29600 tree decl = NULL_TREE;
29602 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
29604 ix86_builtins_isa[(int) code].isa = mask;
29606 /* OPTION_MASK_ISA_AVX512VL has special meaning. Despite of generic case,
29607 where any bit set means that built-in is enable, this bit must be *and-ed*
29608 with another one. E.g.: OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL
29609 means that *both* cpuid bits must be set for the built-in to be available.
29610 Handle this here. */
29611 if (mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
29612 mask &= ~OPTION_MASK_ISA_AVX512VL;
29614 mask &= ~OPTION_MASK_ISA_64BIT;
29615 if (mask == 0
29616 || (mask & ix86_isa_flags) != 0
29617 || (lang_hooks.builtin_function
29618 == lang_hooks.builtin_function_ext_scope))
29621 tree type = ix86_get_builtin_func_type (tcode);
29622 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
29623 NULL, NULL_TREE);
29624 ix86_builtins[(int) code] = decl;
29625 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
29627 else
29629 /* Just a MASK where set_and_not_built_p == true can potentially
29630 include a builtin. */
29631 deferred_isa_values |= mask;
29632 ix86_builtins[(int) code] = NULL_TREE;
29633 ix86_builtins_isa[(int) code].tcode = tcode;
29634 ix86_builtins_isa[(int) code].name = name;
29635 ix86_builtins_isa[(int) code].leaf_p = false;
29636 ix86_builtins_isa[(int) code].nothrow_p = false;
29637 ix86_builtins_isa[(int) code].const_p = false;
29638 ix86_builtins_isa[(int) code].pure_p = false;
29639 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
29643 return decl;
29646 /* Like def_builtin, but also marks the function decl "const". */
29648 static inline tree
29649 def_builtin_const (HOST_WIDE_INT mask, const char *name,
29650 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29652 tree decl = def_builtin (mask, name, tcode, code);
29653 if (decl)
29654 TREE_READONLY (decl) = 1;
29655 else
29656 ix86_builtins_isa[(int) code].const_p = true;
29658 return decl;
29661 /* Like def_builtin, but also marks the function decl "pure". */
29663 static inline tree
29664 def_builtin_pure (HOST_WIDE_INT mask, const char *name,
29665 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29667 tree decl = def_builtin (mask, name, tcode, code);
29668 if (decl)
29669 DECL_PURE_P (decl) = 1;
29670 else
29671 ix86_builtins_isa[(int) code].pure_p = true;
29673 return decl;
29676 /* Like def_builtin, but for additional isa2 flags. */
29678 static inline tree
29679 def_builtin2 (HOST_WIDE_INT mask, const char *name,
29680 enum ix86_builtin_func_type tcode,
29681 enum ix86_builtins code)
29683 tree decl = NULL_TREE;
29685 ix86_builtins_isa[(int) code].isa2 = mask;
29687 if (mask == 0
29688 || (mask & ix86_isa_flags2) != 0
29689 || (lang_hooks.builtin_function
29690 == lang_hooks.builtin_function_ext_scope))
29693 tree type = ix86_get_builtin_func_type (tcode);
29694 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
29695 NULL, NULL_TREE);
29696 ix86_builtins[(int) code] = decl;
29697 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
29699 else
29701 /* Just a MASK where set_and_not_built_p == true can potentially
29702 include a builtin. */
29703 deferred_isa_values2 |= mask;
29704 ix86_builtins[(int) code] = NULL_TREE;
29705 ix86_builtins_isa[(int) code].tcode = tcode;
29706 ix86_builtins_isa[(int) code].name = name;
29707 ix86_builtins_isa[(int) code].leaf_p = false;
29708 ix86_builtins_isa[(int) code].nothrow_p = false;
29709 ix86_builtins_isa[(int) code].const_p = false;
29710 ix86_builtins_isa[(int) code].pure_p = false;
29711 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
29714 return decl;
29717 /* Like def_builtin, but also marks the function decl "const". */
29719 static inline tree
29720 def_builtin_const2 (HOST_WIDE_INT mask, const char *name,
29721 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29723 tree decl = def_builtin2 (mask, name, tcode, code);
29724 if (decl)
29725 TREE_READONLY (decl) = 1;
29726 else
29727 ix86_builtins_isa[(int) code].const_p = true;
29729 return decl;
29732 /* Like def_builtin, but also marks the function decl "pure". */
29734 static inline tree
29735 def_builtin_pure2 (HOST_WIDE_INT mask, const char *name,
29736 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29738 tree decl = def_builtin2 (mask, name, tcode, code);
29739 if (decl)
29740 DECL_PURE_P (decl) = 1;
29741 else
29742 ix86_builtins_isa[(int) code].pure_p = true;
29744 return decl;
29747 /* Add any new builtin functions for a given ISA that may not have been
29748 declared. This saves a bit of space compared to adding all of the
29749 declarations to the tree, even if we didn't use them. */
29751 static void
29752 ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2)
29754 if ((isa & deferred_isa_values) == 0
29755 && (isa2 & deferred_isa_values2) == 0)
29756 return;
29758 /* Bits in ISA value can be removed from potential isa values. */
29759 deferred_isa_values &= ~isa;
29760 deferred_isa_values2 &= ~isa2;
29762 int i;
29763 tree saved_current_target_pragma = current_target_pragma;
29764 current_target_pragma = NULL_TREE;
29766 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
29768 if (((ix86_builtins_isa[i].isa & isa) != 0
29769 || (ix86_builtins_isa[i].isa2 & isa2) != 0)
29770 && ix86_builtins_isa[i].set_and_not_built_p)
29772 tree decl, type;
29774 /* Don't define the builtin again. */
29775 ix86_builtins_isa[i].set_and_not_built_p = false;
29777 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
29778 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
29779 type, i, BUILT_IN_MD, NULL,
29780 NULL_TREE);
29782 ix86_builtins[i] = decl;
29783 if (ix86_builtins_isa[i].const_p)
29784 TREE_READONLY (decl) = 1;
29785 if (ix86_builtins_isa[i].pure_p)
29786 DECL_PURE_P (decl) = 1;
29787 if (ix86_builtins_isa[i].leaf_p)
29788 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
29789 NULL_TREE);
29790 if (ix86_builtins_isa[i].nothrow_p)
29791 TREE_NOTHROW (decl) = 1;
29795 current_target_pragma = saved_current_target_pragma;
29798 /* Bits for builtin_description.flag. */
29800 /* Set when we don't support the comparison natively, and should
29801 swap_comparison in order to support it. */
29802 #define BUILTIN_DESC_SWAP_OPERANDS 1
29804 struct builtin_description
29806 const HOST_WIDE_INT mask;
29807 const enum insn_code icode;
29808 const char *const name;
29809 const enum ix86_builtins code;
29810 const enum rtx_code comparison;
29811 const int flag;
29814 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
29815 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
29816 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
29817 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
29818 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
29819 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
29820 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
29821 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
29822 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
29823 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
29824 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
29825 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
29826 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
29827 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
29828 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
29829 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
29830 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
29831 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
29832 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
29833 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
29834 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
29835 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
29836 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
29837 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
29838 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
29839 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
29840 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
29841 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
29842 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
29843 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
29844 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
29845 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
29846 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
29847 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
29848 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
29849 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
29850 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
29851 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
29852 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
29853 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
29854 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
29855 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
29856 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
29857 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
29858 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
29859 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
29860 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
29861 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
29862 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
29863 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
29864 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
29865 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
29867 #define BDESC(mask, icode, name, code, comparison, flag) \
29868 { mask, icode, name, code, comparison, flag },
29869 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
29870 static const struct builtin_description bdesc_##kind[] = \
29872 BDESC (mask, icode, name, code, comparison, flag)
29873 #define BDESC_END(kind, next_kind) \
29876 #include "i386-builtin.def"
29878 #undef BDESC
29879 #undef BDESC_FIRST
29880 #undef BDESC_END
29882 /* TM vector builtins. */
29884 /* Reuse the existing x86-specific `struct builtin_description' cause
29885 we're lazy. Add casts to make them fit. */
29886 static const struct builtin_description bdesc_tm[] =
29888 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
29889 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
29890 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
29891 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
29892 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
29893 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
29894 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
29896 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
29897 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
29898 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
29899 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
29900 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
29901 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
29902 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
29904 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
29905 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
29906 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
29907 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
29908 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
29909 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
29910 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
29912 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
29913 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
29914 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
29917 /* Initialize the transactional memory vector load/store builtins. */
29919 static void
29920 ix86_init_tm_builtins (void)
29922 enum ix86_builtin_func_type ftype;
29923 const struct builtin_description *d;
29924 size_t i;
29925 tree decl;
29926 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
29927 tree attrs_log, attrs_type_log;
29929 if (!flag_tm)
29930 return;
29932 /* If there are no builtins defined, we must be compiling in a
29933 language without trans-mem support. */
29934 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
29935 return;
29937 /* Use whatever attributes a normal TM load has. */
29938 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
29939 attrs_load = DECL_ATTRIBUTES (decl);
29940 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
29941 /* Use whatever attributes a normal TM store has. */
29942 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
29943 attrs_store = DECL_ATTRIBUTES (decl);
29944 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
29945 /* Use whatever attributes a normal TM log has. */
29946 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
29947 attrs_log = DECL_ATTRIBUTES (decl);
29948 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
29950 for (i = 0, d = bdesc_tm;
29951 i < ARRAY_SIZE (bdesc_tm);
29952 i++, d++)
29954 if ((d->mask & ix86_isa_flags) != 0
29955 || (lang_hooks.builtin_function
29956 == lang_hooks.builtin_function_ext_scope))
29958 tree type, attrs, attrs_type;
29959 enum built_in_function code = (enum built_in_function) d->code;
29961 ftype = (enum ix86_builtin_func_type) d->flag;
29962 type = ix86_get_builtin_func_type (ftype);
29964 if (BUILTIN_TM_LOAD_P (code))
29966 attrs = attrs_load;
29967 attrs_type = attrs_type_load;
29969 else if (BUILTIN_TM_STORE_P (code))
29971 attrs = attrs_store;
29972 attrs_type = attrs_type_store;
29974 else
29976 attrs = attrs_log;
29977 attrs_type = attrs_type_log;
29979 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
29980 /* The builtin without the prefix for
29981 calling it directly. */
29982 d->name + strlen ("__builtin_"),
29983 attrs);
29984 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
29985 set the TYPE_ATTRIBUTES. */
29986 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
29988 set_builtin_decl (code, decl, false);
29993 /* Macros for verification of enum ix86_builtins order. */
29994 #define BDESC_VERIFY(x, y, z) \
29995 gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z)))
29996 #define BDESC_VERIFYS(x, y, z) \
29997 STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z)))
29999 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
30000 IX86_BUILTIN__BDESC_COMI_LAST, 1);
30001 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
30002 IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1);
30003 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
30004 IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1);
30005 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST,
30006 IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1);
30007 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
30008 IX86_BUILTIN__BDESC_ARGS_LAST, 1);
30009 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_FIRST,
30010 IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, 1);
30011 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_FIRST,
30012 IX86_BUILTIN__BDESC_ARGS2_LAST, 1);
30013 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_FIRST,
30014 IX86_BUILTIN__BDESC_MPX_LAST, 1);
30015 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
30016 IX86_BUILTIN__BDESC_MPX_CONST_LAST, 1);
30017 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_FIRST,
30018 IX86_BUILTIN__BDESC_MULTI_ARG_LAST, 1);
30019 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
30020 IX86_BUILTIN__BDESC_CET_LAST, 1);
30021 BDESC_VERIFYS (IX86_BUILTIN_MAX,
30022 IX86_BUILTIN__BDESC_CET_NORMAL_LAST, 1);
30024 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30025 in the current target ISA to allow the user to compile particular modules
30026 with different target specific options that differ from the command line
30027 options. */
30028 static void
30029 ix86_init_mmx_sse_builtins (void)
30031 const struct builtin_description * d;
30032 enum ix86_builtin_func_type ftype;
30033 size_t i;
30035 /* Add all special builtins with variable number of operands. */
30036 for (i = 0, d = bdesc_special_args;
30037 i < ARRAY_SIZE (bdesc_special_args);
30038 i++, d++)
30040 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, i);
30041 if (d->name == 0)
30042 continue;
30044 ftype = (enum ix86_builtin_func_type) d->flag;
30045 def_builtin (d->mask, d->name, ftype, d->code);
30047 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST,
30048 IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
30049 ARRAY_SIZE (bdesc_special_args) - 1);
30051 /* Add all builtins with variable number of operands. */
30052 for (i = 0, d = bdesc_args;
30053 i < ARRAY_SIZE (bdesc_args);
30054 i++, d++)
30056 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS_FIRST, i);
30057 if (d->name == 0)
30058 continue;
30060 ftype = (enum ix86_builtin_func_type) d->flag;
30061 def_builtin_const (d->mask, d->name, ftype, d->code);
30063 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST,
30064 IX86_BUILTIN__BDESC_ARGS_FIRST,
30065 ARRAY_SIZE (bdesc_args) - 1);
30067 /* Add all builtins with variable number of operands. */
30068 for (i = 0, d = bdesc_args2;
30069 i < ARRAY_SIZE (bdesc_args2);
30070 i++, d++)
30072 if (d->name == 0)
30073 continue;
30075 ftype = (enum ix86_builtin_func_type) d->flag;
30076 def_builtin_const2 (d->mask, d->name, ftype, d->code);
30079 /* Add all builtins with rounding. */
30080 for (i = 0, d = bdesc_round_args;
30081 i < ARRAY_SIZE (bdesc_round_args);
30082 i++, d++)
30084 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, i);
30085 if (d->name == 0)
30086 continue;
30088 ftype = (enum ix86_builtin_func_type) d->flag;
30089 def_builtin_const (d->mask, d->name, ftype, d->code);
30091 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST,
30092 IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
30093 ARRAY_SIZE (bdesc_round_args) - 1);
30095 /* pcmpestr[im] insns. */
30096 for (i = 0, d = bdesc_pcmpestr;
30097 i < ARRAY_SIZE (bdesc_pcmpestr);
30098 i++, d++)
30100 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPESTR_FIRST, i);
30101 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30102 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30103 else
30104 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30105 def_builtin_const (d->mask, d->name, ftype, d->code);
30107 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST,
30108 IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
30109 ARRAY_SIZE (bdesc_pcmpestr) - 1);
30111 /* pcmpistr[im] insns. */
30112 for (i = 0, d = bdesc_pcmpistr;
30113 i < ARRAY_SIZE (bdesc_pcmpistr);
30114 i++, d++)
30116 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPISTR_FIRST, i);
30117 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30118 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30119 else
30120 ftype = INT_FTYPE_V16QI_V16QI_INT;
30121 def_builtin_const (d->mask, d->name, ftype, d->code);
30123 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST,
30124 IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
30125 ARRAY_SIZE (bdesc_pcmpistr) - 1);
30127 /* comi/ucomi insns. */
30128 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30130 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_COMI_FIRST, i);
30131 if (d->mask == OPTION_MASK_ISA_SSE2)
30132 ftype = INT_FTYPE_V2DF_V2DF;
30133 else
30134 ftype = INT_FTYPE_V4SF_V4SF;
30135 def_builtin_const (d->mask, d->name, ftype, d->code);
30137 BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST,
30138 IX86_BUILTIN__BDESC_COMI_FIRST,
30139 ARRAY_SIZE (bdesc_comi) - 1);
30141 /* SSE */
30142 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30143 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30144 def_builtin_pure (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30145 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30147 /* SSE or 3DNow!A */
30148 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
30149 /* As it uses V4HImode, we have to require -mmmx too. */
30150 | OPTION_MASK_ISA_MMX,
30151 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30152 IX86_BUILTIN_MASKMOVQ);
30154 /* SSE2 */
30155 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30156 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30158 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30159 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30160 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30161 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30163 /* SSE3. */
30164 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30165 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30166 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30167 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30169 /* AES */
30170 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30171 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30172 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30173 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30174 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30175 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30176 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30177 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30178 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30179 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30180 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30181 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30183 /* PCLMUL */
30184 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30185 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30187 /* RDRND */
30188 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30189 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30190 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30191 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30192 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30193 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30194 IX86_BUILTIN_RDRAND64_STEP);
30196 /* AVX2 */
30197 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30198 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30199 IX86_BUILTIN_GATHERSIV2DF);
30201 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30202 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30203 IX86_BUILTIN_GATHERSIV4DF);
30205 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30206 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30207 IX86_BUILTIN_GATHERDIV2DF);
30209 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30210 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30211 IX86_BUILTIN_GATHERDIV4DF);
30213 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30214 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30215 IX86_BUILTIN_GATHERSIV4SF);
30217 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30218 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30219 IX86_BUILTIN_GATHERSIV8SF);
30221 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30222 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30223 IX86_BUILTIN_GATHERDIV4SF);
30225 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30226 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30227 IX86_BUILTIN_GATHERDIV8SF);
30229 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30230 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30231 IX86_BUILTIN_GATHERSIV2DI);
30233 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30234 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30235 IX86_BUILTIN_GATHERSIV4DI);
30237 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30238 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30239 IX86_BUILTIN_GATHERDIV2DI);
30241 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30242 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30243 IX86_BUILTIN_GATHERDIV4DI);
30245 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30246 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30247 IX86_BUILTIN_GATHERSIV4SI);
30249 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30250 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30251 IX86_BUILTIN_GATHERSIV8SI);
30253 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30254 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30255 IX86_BUILTIN_GATHERDIV4SI);
30257 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30258 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30259 IX86_BUILTIN_GATHERDIV8SI);
30261 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30262 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30263 IX86_BUILTIN_GATHERALTSIV4DF);
30265 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30266 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30267 IX86_BUILTIN_GATHERALTDIV8SF);
30269 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30270 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30271 IX86_BUILTIN_GATHERALTSIV4DI);
30273 def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30274 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30275 IX86_BUILTIN_GATHERALTDIV8SI);
30277 /* AVX512F */
30278 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30279 V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT,
30280 IX86_BUILTIN_GATHER3SIV16SF);
30282 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30283 V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT,
30284 IX86_BUILTIN_GATHER3SIV8DF);
30286 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30287 V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT,
30288 IX86_BUILTIN_GATHER3DIV16SF);
30290 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30291 V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT,
30292 IX86_BUILTIN_GATHER3DIV8DF);
30294 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
30295 V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT,
30296 IX86_BUILTIN_GATHER3SIV16SI);
30298 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
30299 V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT,
30300 IX86_BUILTIN_GATHER3SIV8DI);
30302 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
30303 V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT,
30304 IX86_BUILTIN_GATHER3DIV16SI);
30306 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
30307 V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT,
30308 IX86_BUILTIN_GATHER3DIV8DI);
30310 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
30311 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
30312 IX86_BUILTIN_GATHER3ALTSIV8DF);
30314 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
30315 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
30316 IX86_BUILTIN_GATHER3ALTDIV16SF);
30318 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
30319 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
30320 IX86_BUILTIN_GATHER3ALTSIV8DI);
30322 def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
30323 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
30324 IX86_BUILTIN_GATHER3ALTDIV16SI);
30326 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
30327 VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT,
30328 IX86_BUILTIN_SCATTERSIV16SF);
30330 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
30331 VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT,
30332 IX86_BUILTIN_SCATTERSIV8DF);
30334 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
30335 VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT,
30336 IX86_BUILTIN_SCATTERDIV16SF);
30338 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
30339 VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT,
30340 IX86_BUILTIN_SCATTERDIV8DF);
30342 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
30343 VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT,
30344 IX86_BUILTIN_SCATTERSIV16SI);
30346 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
30347 VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT,
30348 IX86_BUILTIN_SCATTERSIV8DI);
30350 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
30351 VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT,
30352 IX86_BUILTIN_SCATTERDIV16SI);
30354 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
30355 VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT,
30356 IX86_BUILTIN_SCATTERDIV8DI);
30358 /* AVX512VL */
30359 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
30360 V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT,
30361 IX86_BUILTIN_GATHER3SIV2DF);
30363 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
30364 V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT,
30365 IX86_BUILTIN_GATHER3SIV4DF);
30367 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
30368 V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT,
30369 IX86_BUILTIN_GATHER3DIV2DF);
30371 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
30372 V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT,
30373 IX86_BUILTIN_GATHER3DIV4DF);
30375 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
30376 V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT,
30377 IX86_BUILTIN_GATHER3SIV4SF);
30379 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
30380 V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT,
30381 IX86_BUILTIN_GATHER3SIV8SF);
30383 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
30384 V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT,
30385 IX86_BUILTIN_GATHER3DIV4SF);
30387 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
30388 V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT,
30389 IX86_BUILTIN_GATHER3DIV8SF);
30391 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
30392 V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT,
30393 IX86_BUILTIN_GATHER3SIV2DI);
30395 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
30396 V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT,
30397 IX86_BUILTIN_GATHER3SIV4DI);
30399 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
30400 V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT,
30401 IX86_BUILTIN_GATHER3DIV2DI);
30403 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
30404 V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT,
30405 IX86_BUILTIN_GATHER3DIV4DI);
30407 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
30408 V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT,
30409 IX86_BUILTIN_GATHER3SIV4SI);
30411 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
30412 V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT,
30413 IX86_BUILTIN_GATHER3SIV8SI);
30415 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
30416 V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT,
30417 IX86_BUILTIN_GATHER3DIV4SI);
30419 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
30420 V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT,
30421 IX86_BUILTIN_GATHER3DIV8SI);
30423 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
30424 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
30425 IX86_BUILTIN_GATHER3ALTSIV4DF);
30427 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
30428 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
30429 IX86_BUILTIN_GATHER3ALTDIV8SF);
30431 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
30432 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
30433 IX86_BUILTIN_GATHER3ALTSIV4DI);
30435 def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
30436 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
30437 IX86_BUILTIN_GATHER3ALTDIV8SI);
30439 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
30440 VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT,
30441 IX86_BUILTIN_SCATTERSIV8SF);
30443 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
30444 VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT,
30445 IX86_BUILTIN_SCATTERSIV4SF);
30447 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
30448 VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT,
30449 IX86_BUILTIN_SCATTERSIV4DF);
30451 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
30452 VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT,
30453 IX86_BUILTIN_SCATTERSIV2DF);
30455 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
30456 VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT,
30457 IX86_BUILTIN_SCATTERDIV8SF);
30459 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
30460 VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT,
30461 IX86_BUILTIN_SCATTERDIV4SF);
30463 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
30464 VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT,
30465 IX86_BUILTIN_SCATTERDIV4DF);
30467 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
30468 VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT,
30469 IX86_BUILTIN_SCATTERDIV2DF);
30471 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
30472 VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT,
30473 IX86_BUILTIN_SCATTERSIV8SI);
30475 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
30476 VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT,
30477 IX86_BUILTIN_SCATTERSIV4SI);
30479 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
30480 VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT,
30481 IX86_BUILTIN_SCATTERSIV4DI);
30483 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
30484 VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT,
30485 IX86_BUILTIN_SCATTERSIV2DI);
30487 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
30488 VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT,
30489 IX86_BUILTIN_SCATTERDIV8SI);
30491 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
30492 VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT,
30493 IX86_BUILTIN_SCATTERDIV4SI);
30495 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
30496 VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT,
30497 IX86_BUILTIN_SCATTERDIV4DI);
30499 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
30500 VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT,
30501 IX86_BUILTIN_SCATTERDIV2DI);
30502 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
30503 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
30504 IX86_BUILTIN_SCATTERALTSIV8DF);
30506 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
30507 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
30508 IX86_BUILTIN_SCATTERALTDIV16SF);
30510 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
30511 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
30512 IX86_BUILTIN_SCATTERALTSIV8DI);
30514 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
30515 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
30516 IX86_BUILTIN_SCATTERALTDIV16SI);
30518 /* AVX512PF */
30519 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
30520 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
30521 IX86_BUILTIN_GATHERPFDPD);
30522 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
30523 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
30524 IX86_BUILTIN_GATHERPFDPS);
30525 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
30526 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30527 IX86_BUILTIN_GATHERPFQPD);
30528 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
30529 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30530 IX86_BUILTIN_GATHERPFQPS);
30531 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
30532 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
30533 IX86_BUILTIN_SCATTERPFDPD);
30534 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
30535 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
30536 IX86_BUILTIN_SCATTERPFDPS);
30537 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
30538 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30539 IX86_BUILTIN_SCATTERPFQPD);
30540 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
30541 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
30542 IX86_BUILTIN_SCATTERPFQPS);
30544 /* SHA */
30545 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
30546 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
30547 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
30548 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
30549 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
30550 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
30551 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
30552 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
30553 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
30554 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
30555 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
30556 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
30557 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
30558 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
30560 /* RTM. */
30561 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
30562 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
30564 /* MMX access to the vec_init patterns. */
30565 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
30566 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
30568 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
30569 V4HI_FTYPE_HI_HI_HI_HI,
30570 IX86_BUILTIN_VEC_INIT_V4HI);
30572 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
30573 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
30574 IX86_BUILTIN_VEC_INIT_V8QI);
30576 /* Access to the vec_extract patterns. */
30577 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
30578 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
30579 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
30580 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
30581 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
30582 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
30583 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
30584 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
30585 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
30586 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
30588 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
30589 /* As it uses V4HImode, we have to require -mmmx too. */
30590 | OPTION_MASK_ISA_MMX,
30591 "__builtin_ia32_vec_ext_v4hi",
30592 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
30594 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
30595 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
30597 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
30598 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
30600 /* Access to the vec_set patterns. */
30601 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
30602 "__builtin_ia32_vec_set_v2di",
30603 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
30605 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
30606 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
30608 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
30609 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
30611 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
30612 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
30614 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
30615 /* As it uses V4HImode, we have to require -mmmx too. */
30616 | OPTION_MASK_ISA_MMX,
30617 "__builtin_ia32_vec_set_v4hi",
30618 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
30620 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
30621 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
30623 /* RDSEED */
30624 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
30625 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
30626 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
30627 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
30628 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
30629 "__builtin_ia32_rdseed_di_step",
30630 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
30632 /* ADCX */
30633 def_builtin (0, "__builtin_ia32_addcarryx_u32",
30634 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
30635 def_builtin (OPTION_MASK_ISA_64BIT,
30636 "__builtin_ia32_addcarryx_u64",
30637 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
30638 IX86_BUILTIN_ADDCARRYX64);
30640 /* SBB */
30641 def_builtin (0, "__builtin_ia32_sbb_u32",
30642 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
30643 def_builtin (OPTION_MASK_ISA_64BIT,
30644 "__builtin_ia32_sbb_u64",
30645 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
30646 IX86_BUILTIN_SBB64);
30648 /* Read/write FLAGS. */
30649 def_builtin (0, "__builtin_ia32_readeflags_u32",
30650 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
30651 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
30652 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
30653 def_builtin (0, "__builtin_ia32_writeeflags_u32",
30654 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
30655 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
30656 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
30658 /* CLFLUSHOPT. */
30659 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
30660 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
30662 /* CLWB. */
30663 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
30664 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
30666 /* MONITORX and MWAITX. */
30667 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
30668 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
30669 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
30670 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
30672 /* CLZERO. */
30673 def_builtin (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
30674 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
30676 /* Add FMA4 multi-arg argument instructions */
30677 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
30679 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, i);
30680 if (d->name == 0)
30681 continue;
30683 ftype = (enum ix86_builtin_func_type) d->flag;
30684 def_builtin_const (d->mask, d->name, ftype, d->code);
30686 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST,
30687 IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
30688 ARRAY_SIZE (bdesc_multi_arg) - 1);
30690 /* Add CET inrinsics. */
30691 for (i = 0, d = bdesc_cet; i < ARRAY_SIZE (bdesc_cet); i++, d++)
30693 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_FIRST, i);
30694 if (d->name == 0)
30695 continue;
30697 ftype = (enum ix86_builtin_func_type) d->flag;
30698 def_builtin2 (d->mask, d->name, ftype, d->code);
30700 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_LAST,
30701 IX86_BUILTIN__BDESC_CET_FIRST,
30702 ARRAY_SIZE (bdesc_cet) - 1);
30704 for (i = 0, d = bdesc_cet_rdssp;
30705 i < ARRAY_SIZE (bdesc_cet_rdssp);
30706 i++, d++)
30708 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, i);
30709 if (d->name == 0)
30710 continue;
30712 ftype = (enum ix86_builtin_func_type) d->flag;
30713 def_builtin2 (d->mask, d->name, ftype, d->code);
30715 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_LAST,
30716 IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
30717 ARRAY_SIZE (bdesc_cet_rdssp) - 1);
30720 static void
30721 ix86_init_mpx_builtins ()
30723 const struct builtin_description * d;
30724 enum ix86_builtin_func_type ftype;
30725 tree decl;
30726 size_t i;
30728 for (i = 0, d = bdesc_mpx;
30729 i < ARRAY_SIZE (bdesc_mpx);
30730 i++, d++)
30732 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MPX_FIRST, i);
30733 if (d->name == 0)
30734 continue;
30736 ftype = (enum ix86_builtin_func_type) d->flag;
30737 decl = def_builtin (d->mask, d->name, ftype, d->code);
30739 /* With no leaf and nothrow flags for MPX builtins
30740 abnormal edges may follow its call when setjmp
30741 presents in the function. Since we may have a lot
30742 of MPX builtins calls it causes lots of useless
30743 edges and enormous PHI nodes. To avoid this we mark
30744 MPX builtins as leaf and nothrow. */
30745 if (decl)
30747 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30748 NULL_TREE);
30749 TREE_NOTHROW (decl) = 1;
30751 else
30753 ix86_builtins_isa[(int)d->code].leaf_p = true;
30754 ix86_builtins_isa[(int)d->code].nothrow_p = true;
30757 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_LAST,
30758 IX86_BUILTIN__BDESC_MPX_FIRST,
30759 ARRAY_SIZE (bdesc_mpx) - 1);
30761 for (i = 0, d = bdesc_mpx_const;
30762 i < ARRAY_SIZE (bdesc_mpx_const);
30763 i++, d++)
30765 BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MPX_CONST_FIRST, i);
30766 if (d->name == 0)
30767 continue;
30769 ftype = (enum ix86_builtin_func_type) d->flag;
30770 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
30772 if (decl)
30774 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30775 NULL_TREE);
30776 TREE_NOTHROW (decl) = 1;
30778 else
30780 ix86_builtins_isa[(int)d->code].leaf_p = true;
30781 ix86_builtins_isa[(int)d->code].nothrow_p = true;
30784 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_LAST,
30785 IX86_BUILTIN__BDESC_MPX_CONST_FIRST,
30786 ARRAY_SIZE (bdesc_mpx_const) - 1);
30788 #undef BDESC_VERIFY
30789 #undef BDESC_VERIFYS
30791 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
30792 to return a pointer to VERSION_DECL if the outcome of the expression
30793 formed by PREDICATE_CHAIN is true. This function will be called during
30794 version dispatch to decide which function version to execute. It returns
30795 the basic block at the end, to which more conditions can be added. */
30797 static basic_block
30798 add_condition_to_bb (tree function_decl, tree version_decl,
30799 tree predicate_chain, basic_block new_bb)
30801 gimple *return_stmt;
30802 tree convert_expr, result_var;
30803 gimple *convert_stmt;
30804 gimple *call_cond_stmt;
30805 gimple *if_else_stmt;
30807 basic_block bb1, bb2, bb3;
30808 edge e12, e23;
30810 tree cond_var, and_expr_var = NULL_TREE;
30811 gimple_seq gseq;
30813 tree predicate_decl, predicate_arg;
30815 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
30817 gcc_assert (new_bb != NULL);
30818 gseq = bb_seq (new_bb);
30821 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
30822 build_fold_addr_expr (version_decl));
30823 result_var = create_tmp_var (ptr_type_node);
30824 convert_stmt = gimple_build_assign (result_var, convert_expr);
30825 return_stmt = gimple_build_return (result_var);
30827 if (predicate_chain == NULL_TREE)
30829 gimple_seq_add_stmt (&gseq, convert_stmt);
30830 gimple_seq_add_stmt (&gseq, return_stmt);
30831 set_bb_seq (new_bb, gseq);
30832 gimple_set_bb (convert_stmt, new_bb);
30833 gimple_set_bb (return_stmt, new_bb);
30834 pop_cfun ();
30835 return new_bb;
30838 while (predicate_chain != NULL)
30840 cond_var = create_tmp_var (integer_type_node);
30841 predicate_decl = TREE_PURPOSE (predicate_chain);
30842 predicate_arg = TREE_VALUE (predicate_chain);
30843 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
30844 gimple_call_set_lhs (call_cond_stmt, cond_var);
30846 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
30847 gimple_set_bb (call_cond_stmt, new_bb);
30848 gimple_seq_add_stmt (&gseq, call_cond_stmt);
30850 predicate_chain = TREE_CHAIN (predicate_chain);
30852 if (and_expr_var == NULL)
30853 and_expr_var = cond_var;
30854 else
30856 gimple *assign_stmt;
30857 /* Use MIN_EXPR to check if any integer is zero?.
30858 and_expr_var = min_expr <cond_var, and_expr_var> */
30859 assign_stmt = gimple_build_assign (and_expr_var,
30860 build2 (MIN_EXPR, integer_type_node,
30861 cond_var, and_expr_var));
30863 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
30864 gimple_set_bb (assign_stmt, new_bb);
30865 gimple_seq_add_stmt (&gseq, assign_stmt);
30869 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
30870 integer_zero_node,
30871 NULL_TREE, NULL_TREE);
30872 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
30873 gimple_set_bb (if_else_stmt, new_bb);
30874 gimple_seq_add_stmt (&gseq, if_else_stmt);
30876 gimple_seq_add_stmt (&gseq, convert_stmt);
30877 gimple_seq_add_stmt (&gseq, return_stmt);
30878 set_bb_seq (new_bb, gseq);
30880 bb1 = new_bb;
30881 e12 = split_block (bb1, if_else_stmt);
30882 bb2 = e12->dest;
30883 e12->flags &= ~EDGE_FALLTHRU;
30884 e12->flags |= EDGE_TRUE_VALUE;
30886 e23 = split_block (bb2, return_stmt);
30888 gimple_set_bb (convert_stmt, bb2);
30889 gimple_set_bb (return_stmt, bb2);
30891 bb3 = e23->dest;
30892 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
30894 remove_edge (e23);
30895 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
30897 pop_cfun ();
30899 return bb3;
30902 /* This parses the attribute arguments to target in DECL and determines
30903 the right builtin to use to match the platform specification.
30904 It returns the priority value for this version decl. If PREDICATE_LIST
30905 is not NULL, it stores the list of cpu features that need to be checked
30906 before dispatching this function. */
30908 static unsigned int
30909 get_builtin_code_for_version (tree decl, tree *predicate_list)
30911 tree attrs;
30912 struct cl_target_option cur_target;
30913 tree target_node;
30914 struct cl_target_option *new_target;
30915 const char *arg_str = NULL;
30916 const char *attrs_str = NULL;
30917 char *tok_str = NULL;
30918 char *token;
30920 /* Priority of i386 features, greater value is higher priority. This is
30921 used to decide the order in which function dispatch must happen. For
30922 instance, a version specialized for SSE4.2 should be checked for dispatch
30923 before a version for SSE3, as SSE4.2 implies SSE3. */
30924 enum feature_priority
30926 P_ZERO = 0,
30927 P_MMX,
30928 P_SSE,
30929 P_SSE2,
30930 P_SSE3,
30931 P_SSSE3,
30932 P_PROC_SSSE3,
30933 P_SSE4_A,
30934 P_PROC_SSE4_A,
30935 P_SSE4_1,
30936 P_SSE4_2,
30937 P_PROC_SSE4_2,
30938 P_POPCNT,
30939 P_AES,
30940 P_PCLMUL,
30941 P_AVX,
30942 P_PROC_AVX,
30943 P_BMI,
30944 P_PROC_BMI,
30945 P_FMA4,
30946 P_XOP,
30947 P_PROC_XOP,
30948 P_FMA,
30949 P_PROC_FMA,
30950 P_BMI2,
30951 P_AVX2,
30952 P_PROC_AVX2,
30953 P_AVX512F,
30954 P_PROC_AVX512F
30957 enum feature_priority priority = P_ZERO;
30959 /* These are the target attribute strings for which a dispatcher is
30960 available, from fold_builtin_cpu. */
30962 static struct _feature_list
30964 const char *const name;
30965 const enum feature_priority priority;
30967 const feature_list[] =
30969 {"mmx", P_MMX},
30970 {"sse", P_SSE},
30971 {"sse2", P_SSE2},
30972 {"sse3", P_SSE3},
30973 {"sse4a", P_SSE4_A},
30974 {"ssse3", P_SSSE3},
30975 {"sse4.1", P_SSE4_1},
30976 {"sse4.2", P_SSE4_2},
30977 {"popcnt", P_POPCNT},
30978 {"aes", P_AES},
30979 {"pclmul", P_PCLMUL},
30980 {"avx", P_AVX},
30981 {"bmi", P_BMI},
30982 {"fma4", P_FMA4},
30983 {"xop", P_XOP},
30984 {"fma", P_FMA},
30985 {"bmi2", P_BMI2},
30986 {"avx2", P_AVX2},
30987 {"avx512f", P_AVX512F}
30991 static unsigned int NUM_FEATURES
30992 = sizeof (feature_list) / sizeof (struct _feature_list);
30994 unsigned int i;
30996 tree predicate_chain = NULL_TREE;
30997 tree predicate_decl, predicate_arg;
30999 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31000 gcc_assert (attrs != NULL);
31002 attrs = TREE_VALUE (TREE_VALUE (attrs));
31004 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31005 attrs_str = TREE_STRING_POINTER (attrs);
31007 /* Return priority zero for default function. */
31008 if (strcmp (attrs_str, "default") == 0)
31009 return 0;
31011 /* Handle arch= if specified. For priority, set it to be 1 more than
31012 the best instruction set the processor can handle. For instance, if
31013 there is a version for atom and a version for ssse3 (the highest ISA
31014 priority for atom), the atom version must be checked for dispatch
31015 before the ssse3 version. */
31016 if (strstr (attrs_str, "arch=") != NULL)
31018 cl_target_option_save (&cur_target, &global_options);
31019 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31020 &global_options_set);
31022 gcc_assert (target_node);
31023 new_target = TREE_TARGET_OPTION (target_node);
31024 gcc_assert (new_target);
31026 if (new_target->arch_specified && new_target->arch > 0)
31028 switch (new_target->arch)
31030 case PROCESSOR_CORE2:
31031 arg_str = "core2";
31032 priority = P_PROC_SSSE3;
31033 break;
31034 case PROCESSOR_NEHALEM:
31035 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31037 arg_str = "westmere";
31038 priority = P_AES;
31040 else
31042 /* We translate "arch=corei7" and "arch=nehalem" to
31043 "corei7" so that it will be mapped to M_INTEL_COREI7
31044 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31045 arg_str = "corei7";
31046 priority = P_PROC_SSE4_2;
31048 break;
31049 case PROCESSOR_SANDYBRIDGE:
31050 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31051 arg_str = "ivybridge";
31052 else
31053 arg_str = "sandybridge";
31054 priority = P_PROC_AVX;
31055 break;
31056 case PROCESSOR_HASWELL:
31057 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
31058 arg_str = "skylake-avx512";
31059 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
31060 arg_str = "skylake";
31061 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31062 arg_str = "broadwell";
31063 else
31064 arg_str = "haswell";
31065 priority = P_PROC_AVX2;
31066 break;
31067 case PROCESSOR_BONNELL:
31068 arg_str = "bonnell";
31069 priority = P_PROC_SSSE3;
31070 break;
31071 case PROCESSOR_KNL:
31072 arg_str = "knl";
31073 priority = P_PROC_AVX512F;
31074 break;
31075 case PROCESSOR_KNM:
31076 arg_str = "knm";
31077 priority = P_PROC_AVX512F;
31078 break;
31079 case PROCESSOR_SILVERMONT:
31080 arg_str = "silvermont";
31081 priority = P_PROC_SSE4_2;
31082 break;
31083 case PROCESSOR_AMDFAM10:
31084 arg_str = "amdfam10h";
31085 priority = P_PROC_SSE4_A;
31086 break;
31087 case PROCESSOR_BTVER1:
31088 arg_str = "btver1";
31089 priority = P_PROC_SSE4_A;
31090 break;
31091 case PROCESSOR_BTVER2:
31092 arg_str = "btver2";
31093 priority = P_PROC_BMI;
31094 break;
31095 case PROCESSOR_BDVER1:
31096 arg_str = "bdver1";
31097 priority = P_PROC_XOP;
31098 break;
31099 case PROCESSOR_BDVER2:
31100 arg_str = "bdver2";
31101 priority = P_PROC_FMA;
31102 break;
31103 case PROCESSOR_BDVER3:
31104 arg_str = "bdver3";
31105 priority = P_PROC_FMA;
31106 break;
31107 case PROCESSOR_BDVER4:
31108 arg_str = "bdver4";
31109 priority = P_PROC_AVX2;
31110 break;
31111 case PROCESSOR_ZNVER1:
31112 arg_str = "znver1";
31113 priority = P_PROC_AVX2;
31114 break;
31118 cl_target_option_restore (&global_options, &cur_target);
31120 if (predicate_list && arg_str == NULL)
31122 error_at (DECL_SOURCE_LOCATION (decl),
31123 "No dispatcher found for the versioning attributes");
31124 return 0;
31127 if (predicate_list)
31129 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31130 /* For a C string literal the length includes the trailing NULL. */
31131 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31132 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31133 predicate_chain);
31137 /* Process feature name. */
31138 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31139 strcpy (tok_str, attrs_str);
31140 token = strtok (tok_str, ",");
31141 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31143 while (token != NULL)
31145 /* Do not process "arch=" */
31146 if (strncmp (token, "arch=", 5) == 0)
31148 token = strtok (NULL, ",");
31149 continue;
31151 for (i = 0; i < NUM_FEATURES; ++i)
31153 if (strcmp (token, feature_list[i].name) == 0)
31155 if (predicate_list)
31157 predicate_arg = build_string_literal (
31158 strlen (feature_list[i].name) + 1,
31159 feature_list[i].name);
31160 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31161 predicate_chain);
31163 /* Find the maximum priority feature. */
31164 if (feature_list[i].priority > priority)
31165 priority = feature_list[i].priority;
31167 break;
31170 if (predicate_list && i == NUM_FEATURES)
31172 error_at (DECL_SOURCE_LOCATION (decl),
31173 "No dispatcher found for %s", token);
31174 return 0;
31176 token = strtok (NULL, ",");
31178 free (tok_str);
31180 if (predicate_list && predicate_chain == NULL_TREE)
31182 error_at (DECL_SOURCE_LOCATION (decl),
31183 "No dispatcher found for the versioning attributes : %s",
31184 attrs_str);
31185 return 0;
31187 else if (predicate_list)
31189 predicate_chain = nreverse (predicate_chain);
31190 *predicate_list = predicate_chain;
31193 return priority;
31196 /* This compares the priority of target features in function DECL1
31197 and DECL2. It returns positive value if DECL1 is higher priority,
31198 negative value if DECL2 is higher priority and 0 if they are the
31199 same. */
31201 static int
31202 ix86_compare_version_priority (tree decl1, tree decl2)
31204 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31205 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31207 return (int)priority1 - (int)priority2;
31210 /* V1 and V2 point to function versions with different priorities
31211 based on the target ISA. This function compares their priorities. */
31213 static int
31214 feature_compare (const void *v1, const void *v2)
31216 typedef struct _function_version_info
31218 tree version_decl;
31219 tree predicate_chain;
31220 unsigned int dispatch_priority;
31221 } function_version_info;
31223 const function_version_info c1 = *(const function_version_info *)v1;
31224 const function_version_info c2 = *(const function_version_info *)v2;
31225 return (c2.dispatch_priority - c1.dispatch_priority);
31228 /* This function generates the dispatch function for
31229 multi-versioned functions. DISPATCH_DECL is the function which will
31230 contain the dispatch logic. FNDECLS are the function choices for
31231 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31232 in DISPATCH_DECL in which the dispatch code is generated. */
31234 static int
31235 dispatch_function_versions (tree dispatch_decl,
31236 void *fndecls_p,
31237 basic_block *empty_bb)
31239 tree default_decl;
31240 gimple *ifunc_cpu_init_stmt;
31241 gimple_seq gseq;
31242 int ix;
31243 tree ele;
31244 vec<tree> *fndecls;
31245 unsigned int num_versions = 0;
31246 unsigned int actual_versions = 0;
31247 unsigned int i;
31249 struct _function_version_info
31251 tree version_decl;
31252 tree predicate_chain;
31253 unsigned int dispatch_priority;
31254 }*function_version_info;
31256 gcc_assert (dispatch_decl != NULL
31257 && fndecls_p != NULL
31258 && empty_bb != NULL);
31260 /*fndecls_p is actually a vector. */
31261 fndecls = static_cast<vec<tree> *> (fndecls_p);
31263 /* At least one more version other than the default. */
31264 num_versions = fndecls->length ();
31265 gcc_assert (num_versions >= 2);
31267 function_version_info = (struct _function_version_info *)
31268 XNEWVEC (struct _function_version_info, (num_versions - 1));
31270 /* The first version in the vector is the default decl. */
31271 default_decl = (*fndecls)[0];
31273 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31275 gseq = bb_seq (*empty_bb);
31276 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31277 constructors, so explicity call __builtin_cpu_init here. */
31278 ifunc_cpu_init_stmt = gimple_build_call_vec (
31279 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31280 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31281 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31282 set_bb_seq (*empty_bb, gseq);
31284 pop_cfun ();
31287 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31289 tree version_decl = ele;
31290 tree predicate_chain = NULL_TREE;
31291 unsigned int priority;
31292 /* Get attribute string, parse it and find the right predicate decl.
31293 The predicate function could be a lengthy combination of many
31294 features, like arch-type and various isa-variants. */
31295 priority = get_builtin_code_for_version (version_decl,
31296 &predicate_chain);
31298 if (predicate_chain == NULL_TREE)
31299 continue;
31301 function_version_info [actual_versions].version_decl = version_decl;
31302 function_version_info [actual_versions].predicate_chain
31303 = predicate_chain;
31304 function_version_info [actual_versions].dispatch_priority = priority;
31305 actual_versions++;
31308 /* Sort the versions according to descending order of dispatch priority. The
31309 priority is based on the ISA. This is not a perfect solution. There
31310 could still be ambiguity. If more than one function version is suitable
31311 to execute, which one should be dispatched? In future, allow the user
31312 to specify a dispatch priority next to the version. */
31313 qsort (function_version_info, actual_versions,
31314 sizeof (struct _function_version_info), feature_compare);
31316 for (i = 0; i < actual_versions; ++i)
31317 *empty_bb = add_condition_to_bb (dispatch_decl,
31318 function_version_info[i].version_decl,
31319 function_version_info[i].predicate_chain,
31320 *empty_bb);
31322 /* dispatch default version at the end. */
31323 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31324 NULL, *empty_bb);
31326 free (function_version_info);
31327 return 0;
31330 /* This function changes the assembler name for functions that are
31331 versions. If DECL is a function version and has a "target"
31332 attribute, it appends the attribute string to its assembler name. */
31334 static tree
31335 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31337 tree version_attr;
31338 const char *orig_name, *version_string;
31339 char *attr_str, *assembler_name;
31341 if (DECL_DECLARED_INLINE_P (decl)
31342 && lookup_attribute ("gnu_inline",
31343 DECL_ATTRIBUTES (decl)))
31344 error_at (DECL_SOURCE_LOCATION (decl),
31345 "Function versions cannot be marked as gnu_inline,"
31346 " bodies have to be generated");
31348 if (DECL_VIRTUAL_P (decl)
31349 || DECL_VINDEX (decl))
31350 sorry ("Virtual function multiversioning not supported");
31352 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31354 /* target attribute string cannot be NULL. */
31355 gcc_assert (version_attr != NULL_TREE);
31357 orig_name = IDENTIFIER_POINTER (id);
31358 version_string
31359 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31361 if (strcmp (version_string, "default") == 0)
31362 return id;
31364 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31365 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31367 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31369 /* Allow assembler name to be modified if already set. */
31370 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31371 SET_DECL_RTL (decl, NULL);
31373 tree ret = get_identifier (assembler_name);
31374 XDELETEVEC (attr_str);
31375 XDELETEVEC (assembler_name);
31376 return ret;
31380 static tree
31381 ix86_mangle_decl_assembler_name (tree decl, tree id)
31383 /* For function version, add the target suffix to the assembler name. */
31384 if (TREE_CODE (decl) == FUNCTION_DECL
31385 && DECL_FUNCTION_VERSIONED (decl))
31386 id = ix86_mangle_function_version_assembler_name (decl, id);
31387 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31388 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31389 #endif
31391 return id;
31394 /* Make a dispatcher declaration for the multi-versioned function DECL.
31395 Calls to DECL function will be replaced with calls to the dispatcher
31396 by the front-end. Returns the decl of the dispatcher function. */
31398 static tree
31399 ix86_get_function_versions_dispatcher (void *decl)
31401 tree fn = (tree) decl;
31402 struct cgraph_node *node = NULL;
31403 struct cgraph_node *default_node = NULL;
31404 struct cgraph_function_version_info *node_v = NULL;
31405 struct cgraph_function_version_info *first_v = NULL;
31407 tree dispatch_decl = NULL;
31409 struct cgraph_function_version_info *default_version_info = NULL;
31411 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31413 node = cgraph_node::get (fn);
31414 gcc_assert (node != NULL);
31416 node_v = node->function_version ();
31417 gcc_assert (node_v != NULL);
31419 if (node_v->dispatcher_resolver != NULL)
31420 return node_v->dispatcher_resolver;
31422 /* Find the default version and make it the first node. */
31423 first_v = node_v;
31424 /* Go to the beginning of the chain. */
31425 while (first_v->prev != NULL)
31426 first_v = first_v->prev;
31427 default_version_info = first_v;
31428 while (default_version_info != NULL)
31430 if (is_function_default_version
31431 (default_version_info->this_node->decl))
31432 break;
31433 default_version_info = default_version_info->next;
31436 /* If there is no default node, just return NULL. */
31437 if (default_version_info == NULL)
31438 return NULL;
31440 /* Make default info the first node. */
31441 if (first_v != default_version_info)
31443 default_version_info->prev->next = default_version_info->next;
31444 if (default_version_info->next)
31445 default_version_info->next->prev = default_version_info->prev;
31446 first_v->prev = default_version_info;
31447 default_version_info->next = first_v;
31448 default_version_info->prev = NULL;
31451 default_node = default_version_info->this_node;
31453 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31454 if (targetm.has_ifunc_p ())
31456 struct cgraph_function_version_info *it_v = NULL;
31457 struct cgraph_node *dispatcher_node = NULL;
31458 struct cgraph_function_version_info *dispatcher_version_info = NULL;
31460 /* Right now, the dispatching is done via ifunc. */
31461 dispatch_decl = make_dispatcher_decl (default_node->decl);
31463 dispatcher_node = cgraph_node::get_create (dispatch_decl);
31464 gcc_assert (dispatcher_node != NULL);
31465 dispatcher_node->dispatcher_function = 1;
31466 dispatcher_version_info
31467 = dispatcher_node->insert_new_function_version ();
31468 dispatcher_version_info->next = default_version_info;
31469 dispatcher_node->definition = 1;
31471 /* Set the dispatcher for all the versions. */
31472 it_v = default_version_info;
31473 while (it_v != NULL)
31475 it_v->dispatcher_resolver = dispatch_decl;
31476 it_v = it_v->next;
31479 else
31480 #endif
31482 error_at (DECL_SOURCE_LOCATION (default_node->decl),
31483 "multiversioning needs ifunc which is not supported "
31484 "on this target");
31487 return dispatch_decl;
31490 /* Make the resolver function decl to dispatch the versions of
31491 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
31492 ifunc alias that will point to the created resolver. Create an
31493 empty basic block in the resolver and store the pointer in
31494 EMPTY_BB. Return the decl of the resolver function. */
31496 static tree
31497 make_resolver_func (const tree default_decl,
31498 const tree ifunc_alias_decl,
31499 basic_block *empty_bb)
31501 char *resolver_name;
31502 tree decl, type, decl_name, t;
31504 /* IFUNC's have to be globally visible. So, if the default_decl is
31505 not, then the name of the IFUNC should be made unique. */
31506 if (TREE_PUBLIC (default_decl) == 0)
31508 char *ifunc_name = make_unique_name (default_decl, "ifunc", true);
31509 symtab->change_decl_assembler_name (ifunc_alias_decl,
31510 get_identifier (ifunc_name));
31511 XDELETEVEC (ifunc_name);
31514 resolver_name = make_unique_name (default_decl, "resolver", false);
31516 /* The resolver function should return a (void *). */
31517 type = build_function_type_list (ptr_type_node, NULL_TREE);
31519 decl = build_fn_decl (resolver_name, type);
31520 decl_name = get_identifier (resolver_name);
31521 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
31523 DECL_NAME (decl) = decl_name;
31524 TREE_USED (decl) = 1;
31525 DECL_ARTIFICIAL (decl) = 1;
31526 DECL_IGNORED_P (decl) = 1;
31527 TREE_PUBLIC (decl) = 0;
31528 DECL_UNINLINABLE (decl) = 1;
31530 /* Resolver is not external, body is generated. */
31531 DECL_EXTERNAL (decl) = 0;
31532 DECL_EXTERNAL (ifunc_alias_decl) = 0;
31534 DECL_CONTEXT (decl) = NULL_TREE;
31535 DECL_INITIAL (decl) = make_node (BLOCK);
31536 DECL_STATIC_CONSTRUCTOR (decl) = 0;
31538 if (DECL_COMDAT_GROUP (default_decl)
31539 || TREE_PUBLIC (default_decl))
31541 /* In this case, each translation unit with a call to this
31542 versioned function will put out a resolver. Ensure it
31543 is comdat to keep just one copy. */
31544 DECL_COMDAT (decl) = 1;
31545 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
31547 /* Build result decl and add to function_decl. */
31548 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
31549 DECL_ARTIFICIAL (t) = 1;
31550 DECL_IGNORED_P (t) = 1;
31551 DECL_RESULT (decl) = t;
31553 gimplify_function_tree (decl);
31554 push_cfun (DECL_STRUCT_FUNCTION (decl));
31555 *empty_bb = init_lowered_empty_function (decl, false,
31556 profile_count::uninitialized ());
31558 cgraph_node::add_new_function (decl, true);
31559 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
31561 pop_cfun ();
31563 gcc_assert (ifunc_alias_decl != NULL);
31564 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
31565 DECL_ATTRIBUTES (ifunc_alias_decl)
31566 = make_attribute ("ifunc", resolver_name,
31567 DECL_ATTRIBUTES (ifunc_alias_decl));
31569 /* Create the alias for dispatch to resolver here. */
31570 cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
31571 XDELETEVEC (resolver_name);
31572 return decl;
31575 /* Generate the dispatching code body to dispatch multi-versioned function
31576 DECL. The target hook is called to process the "target" attributes and
31577 provide the code to dispatch the right function at run-time. NODE points
31578 to the dispatcher decl whose body will be created. */
31580 static tree
31581 ix86_generate_version_dispatcher_body (void *node_p)
31583 tree resolver_decl;
31584 basic_block empty_bb;
31585 tree default_ver_decl;
31586 struct cgraph_node *versn;
31587 struct cgraph_node *node;
31589 struct cgraph_function_version_info *node_version_info = NULL;
31590 struct cgraph_function_version_info *versn_info = NULL;
31592 node = (cgraph_node *)node_p;
31594 node_version_info = node->function_version ();
31595 gcc_assert (node->dispatcher_function
31596 && node_version_info != NULL);
31598 if (node_version_info->dispatcher_resolver)
31599 return node_version_info->dispatcher_resolver;
31601 /* The first version in the chain corresponds to the default version. */
31602 default_ver_decl = node_version_info->next->this_node->decl;
31604 /* node is going to be an alias, so remove the finalized bit. */
31605 node->definition = false;
31607 resolver_decl = make_resolver_func (default_ver_decl,
31608 node->decl, &empty_bb);
31610 node_version_info->dispatcher_resolver = resolver_decl;
31612 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
31614 auto_vec<tree, 2> fn_ver_vec;
31616 for (versn_info = node_version_info->next; versn_info;
31617 versn_info = versn_info->next)
31619 versn = versn_info->this_node;
31620 /* Check for virtual functions here again, as by this time it should
31621 have been determined if this function needs a vtable index or
31622 not. This happens for methods in derived classes that override
31623 virtual methods in base classes but are not explicitly marked as
31624 virtual. */
31625 if (DECL_VINDEX (versn->decl))
31626 sorry ("Virtual function multiversioning not supported");
31628 fn_ver_vec.safe_push (versn->decl);
31631 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
31632 cgraph_edge::rebuild_edges ();
31633 pop_cfun ();
31634 return resolver_decl;
31636 /* This builds the processor_model struct type defined in
31637 libgcc/config/i386/cpuinfo.c */
31639 static tree
31640 build_processor_model_struct (void)
31642 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
31643 "__cpu_features"};
31644 tree field = NULL_TREE, field_chain = NULL_TREE;
31645 int i;
31646 tree type = make_node (RECORD_TYPE);
31648 /* The first 3 fields are unsigned int. */
31649 for (i = 0; i < 3; ++i)
31651 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
31652 get_identifier (field_name[i]), unsigned_type_node);
31653 if (field_chain != NULL_TREE)
31654 DECL_CHAIN (field) = field_chain;
31655 field_chain = field;
31658 /* The last field is an array of unsigned integers of size one. */
31659 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
31660 get_identifier (field_name[3]),
31661 build_array_type (unsigned_type_node,
31662 build_index_type (size_one_node)));
31663 if (field_chain != NULL_TREE)
31664 DECL_CHAIN (field) = field_chain;
31665 field_chain = field;
31667 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
31668 return type;
31671 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
31673 static tree
31674 make_var_decl (tree type, const char *name)
31676 tree new_decl;
31678 new_decl = build_decl (UNKNOWN_LOCATION,
31679 VAR_DECL,
31680 get_identifier(name),
31681 type);
31683 DECL_EXTERNAL (new_decl) = 1;
31684 TREE_STATIC (new_decl) = 1;
31685 TREE_PUBLIC (new_decl) = 1;
31686 DECL_INITIAL (new_decl) = 0;
31687 DECL_ARTIFICIAL (new_decl) = 0;
31688 DECL_PRESERVE_P (new_decl) = 1;
31690 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
31691 assemble_variable (new_decl, 0, 0, 0);
31693 return new_decl;
31696 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
31697 into an integer defined in libgcc/config/i386/cpuinfo.c */
31699 static tree
31700 fold_builtin_cpu (tree fndecl, tree *args)
31702 unsigned int i;
31703 enum ix86_builtins fn_code = (enum ix86_builtins)
31704 DECL_FUNCTION_CODE (fndecl);
31705 tree param_string_cst = NULL;
31707 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
31708 enum processor_features
31710 F_CMOV = 0,
31711 F_MMX,
31712 F_POPCNT,
31713 F_SSE,
31714 F_SSE2,
31715 F_SSE3,
31716 F_SSSE3,
31717 F_SSE4_1,
31718 F_SSE4_2,
31719 F_AVX,
31720 F_AVX2,
31721 F_SSE4_A,
31722 F_FMA4,
31723 F_XOP,
31724 F_FMA,
31725 F_AVX512F,
31726 F_BMI,
31727 F_BMI2,
31728 F_AES,
31729 F_PCLMUL,
31730 F_AVX512VL,
31731 F_AVX512BW,
31732 F_AVX512DQ,
31733 F_AVX512CD,
31734 F_AVX512ER,
31735 F_AVX512PF,
31736 F_AVX512VBMI,
31737 F_AVX512IFMA,
31738 F_AVX5124VNNIW,
31739 F_AVX5124FMAPS,
31740 F_AVX512VPOPCNTDQ,
31741 F_MAX
31744 /* These are the values for vendor types and cpu types and subtypes
31745 in cpuinfo.c. Cpu types and subtypes should be subtracted by
31746 the corresponding start value. */
31747 enum processor_model
31749 M_INTEL = 1,
31750 M_AMD,
31751 M_CPU_TYPE_START,
31752 M_INTEL_BONNELL,
31753 M_INTEL_CORE2,
31754 M_INTEL_COREI7,
31755 M_AMDFAM10H,
31756 M_AMDFAM15H,
31757 M_INTEL_SILVERMONT,
31758 M_INTEL_KNL,
31759 M_AMD_BTVER1,
31760 M_AMD_BTVER2,
31761 M_AMDFAM17H,
31762 M_INTEL_KNM,
31763 M_CPU_SUBTYPE_START,
31764 M_INTEL_COREI7_NEHALEM,
31765 M_INTEL_COREI7_WESTMERE,
31766 M_INTEL_COREI7_SANDYBRIDGE,
31767 M_AMDFAM10H_BARCELONA,
31768 M_AMDFAM10H_SHANGHAI,
31769 M_AMDFAM10H_ISTANBUL,
31770 M_AMDFAM15H_BDVER1,
31771 M_AMDFAM15H_BDVER2,
31772 M_AMDFAM15H_BDVER3,
31773 M_AMDFAM15H_BDVER4,
31774 M_AMDFAM17H_ZNVER1,
31775 M_INTEL_COREI7_IVYBRIDGE,
31776 M_INTEL_COREI7_HASWELL,
31777 M_INTEL_COREI7_BROADWELL,
31778 M_INTEL_COREI7_SKYLAKE,
31779 M_INTEL_COREI7_SKYLAKE_AVX512
31782 static struct _arch_names_table
31784 const char *const name;
31785 const enum processor_model model;
31787 const arch_names_table[] =
31789 {"amd", M_AMD},
31790 {"intel", M_INTEL},
31791 {"atom", M_INTEL_BONNELL},
31792 {"slm", M_INTEL_SILVERMONT},
31793 {"core2", M_INTEL_CORE2},
31794 {"corei7", M_INTEL_COREI7},
31795 {"nehalem", M_INTEL_COREI7_NEHALEM},
31796 {"westmere", M_INTEL_COREI7_WESTMERE},
31797 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
31798 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
31799 {"haswell", M_INTEL_COREI7_HASWELL},
31800 {"broadwell", M_INTEL_COREI7_BROADWELL},
31801 {"skylake", M_INTEL_COREI7_SKYLAKE},
31802 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
31803 {"bonnell", M_INTEL_BONNELL},
31804 {"silvermont", M_INTEL_SILVERMONT},
31805 {"knl", M_INTEL_KNL},
31806 {"knm", M_INTEL_KNM},
31807 {"amdfam10h", M_AMDFAM10H},
31808 {"barcelona", M_AMDFAM10H_BARCELONA},
31809 {"shanghai", M_AMDFAM10H_SHANGHAI},
31810 {"istanbul", M_AMDFAM10H_ISTANBUL},
31811 {"btver1", M_AMD_BTVER1},
31812 {"amdfam15h", M_AMDFAM15H},
31813 {"bdver1", M_AMDFAM15H_BDVER1},
31814 {"bdver2", M_AMDFAM15H_BDVER2},
31815 {"bdver3", M_AMDFAM15H_BDVER3},
31816 {"bdver4", M_AMDFAM15H_BDVER4},
31817 {"btver2", M_AMD_BTVER2},
31818 {"amdfam17h", M_AMDFAM17H},
31819 {"znver1", M_AMDFAM17H_ZNVER1},
31822 static struct _isa_names_table
31824 const char *const name;
31825 const enum processor_features feature;
31827 const isa_names_table[] =
31829 {"cmov", F_CMOV},
31830 {"mmx", F_MMX},
31831 {"popcnt", F_POPCNT},
31832 {"sse", F_SSE},
31833 {"sse2", F_SSE2},
31834 {"sse3", F_SSE3},
31835 {"ssse3", F_SSSE3},
31836 {"sse4a", F_SSE4_A},
31837 {"sse4.1", F_SSE4_1},
31838 {"sse4.2", F_SSE4_2},
31839 {"avx", F_AVX},
31840 {"fma4", F_FMA4},
31841 {"xop", F_XOP},
31842 {"fma", F_FMA},
31843 {"avx2", F_AVX2},
31844 {"avx512f", F_AVX512F},
31845 {"bmi", F_BMI},
31846 {"bmi2", F_BMI2},
31847 {"aes", F_AES},
31848 {"pclmul", F_PCLMUL},
31849 {"avx512vl",F_AVX512VL},
31850 {"avx512bw",F_AVX512BW},
31851 {"avx512dq",F_AVX512DQ},
31852 {"avx512cd",F_AVX512CD},
31853 {"avx512er",F_AVX512ER},
31854 {"avx512pf",F_AVX512PF},
31855 {"avx512vbmi",F_AVX512VBMI},
31856 {"avx512ifma",F_AVX512IFMA},
31857 {"avx5124vnniw",F_AVX5124VNNIW},
31858 {"avx5124fmaps",F_AVX5124FMAPS},
31859 {"avx512vpopcntdq",F_AVX512VPOPCNTDQ}
31862 tree __processor_model_type = build_processor_model_struct ();
31863 tree __cpu_model_var = make_var_decl (__processor_model_type,
31864 "__cpu_model");
31867 varpool_node::add (__cpu_model_var);
31869 gcc_assert ((args != NULL) && (*args != NULL));
31871 param_string_cst = *args;
31872 while (param_string_cst
31873 && TREE_CODE (param_string_cst) != STRING_CST)
31875 /* *args must be a expr that can contain other EXPRS leading to a
31876 STRING_CST. */
31877 if (!EXPR_P (param_string_cst))
31879 error ("Parameter to builtin must be a string constant or literal");
31880 return integer_zero_node;
31882 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
31885 gcc_assert (param_string_cst);
31887 if (fn_code == IX86_BUILTIN_CPU_IS)
31889 tree ref;
31890 tree field;
31891 tree final;
31893 unsigned int field_val = 0;
31894 unsigned int NUM_ARCH_NAMES
31895 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
31897 for (i = 0; i < NUM_ARCH_NAMES; i++)
31898 if (strcmp (arch_names_table[i].name,
31899 TREE_STRING_POINTER (param_string_cst)) == 0)
31900 break;
31902 if (i == NUM_ARCH_NAMES)
31904 error ("Parameter to builtin not valid: %s",
31905 TREE_STRING_POINTER (param_string_cst));
31906 return integer_zero_node;
31909 field = TYPE_FIELDS (__processor_model_type);
31910 field_val = arch_names_table[i].model;
31912 /* CPU types are stored in the next field. */
31913 if (field_val > M_CPU_TYPE_START
31914 && field_val < M_CPU_SUBTYPE_START)
31916 field = DECL_CHAIN (field);
31917 field_val -= M_CPU_TYPE_START;
31920 /* CPU subtypes are stored in the next field. */
31921 if (field_val > M_CPU_SUBTYPE_START)
31923 field = DECL_CHAIN ( DECL_CHAIN (field));
31924 field_val -= M_CPU_SUBTYPE_START;
31927 /* Get the appropriate field in __cpu_model. */
31928 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
31929 field, NULL_TREE);
31931 /* Check the value. */
31932 final = build2 (EQ_EXPR, unsigned_type_node, ref,
31933 build_int_cstu (unsigned_type_node, field_val));
31934 return build1 (CONVERT_EXPR, integer_type_node, final);
31936 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
31938 tree ref;
31939 tree array_elt;
31940 tree field;
31941 tree final;
31943 unsigned int field_val = 0;
31944 unsigned int NUM_ISA_NAMES
31945 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
31947 for (i = 0; i < NUM_ISA_NAMES; i++)
31948 if (strcmp (isa_names_table[i].name,
31949 TREE_STRING_POINTER (param_string_cst)) == 0)
31950 break;
31952 if (i == NUM_ISA_NAMES)
31954 error ("Parameter to builtin not valid: %s",
31955 TREE_STRING_POINTER (param_string_cst));
31956 return integer_zero_node;
31959 field = TYPE_FIELDS (__processor_model_type);
31960 /* Get the last field, which is __cpu_features. */
31961 while (DECL_CHAIN (field))
31962 field = DECL_CHAIN (field);
31964 /* Get the appropriate field: __cpu_model.__cpu_features */
31965 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
31966 field, NULL_TREE);
31968 /* Access the 0th element of __cpu_features array. */
31969 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
31970 integer_zero_node, NULL_TREE, NULL_TREE);
31972 field_val = (1 << isa_names_table[i].feature);
31973 /* Return __cpu_model.__cpu_features[0] & field_val */
31974 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
31975 build_int_cstu (unsigned_type_node, field_val));
31976 return build1 (CONVERT_EXPR, integer_type_node, final);
31978 gcc_unreachable ();
31981 static tree
31982 ix86_fold_builtin (tree fndecl, int n_args,
31983 tree *args, bool ignore ATTRIBUTE_UNUSED)
31985 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
31987 enum ix86_builtins fn_code = (enum ix86_builtins)
31988 DECL_FUNCTION_CODE (fndecl);
31989 switch (fn_code)
31991 case IX86_BUILTIN_CPU_IS:
31992 case IX86_BUILTIN_CPU_SUPPORTS:
31993 gcc_assert (n_args == 1);
31994 return fold_builtin_cpu (fndecl, args);
31996 case IX86_BUILTIN_NANQ:
31997 case IX86_BUILTIN_NANSQ:
31999 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32000 const char *str = c_getstr (*args);
32001 int quiet = fn_code == IX86_BUILTIN_NANQ;
32002 REAL_VALUE_TYPE real;
32004 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
32005 return build_real (type, real);
32006 return NULL_TREE;
32009 case IX86_BUILTIN_INFQ:
32010 case IX86_BUILTIN_HUGE_VALQ:
32012 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32013 REAL_VALUE_TYPE inf;
32014 real_inf (&inf);
32015 return build_real (type, inf);
32018 case IX86_BUILTIN_TZCNT16:
32019 case IX86_BUILTIN_CTZS:
32020 case IX86_BUILTIN_TZCNT32:
32021 case IX86_BUILTIN_TZCNT64:
32022 gcc_assert (n_args == 1);
32023 if (TREE_CODE (args[0]) == INTEGER_CST)
32025 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32026 tree arg = args[0];
32027 if (fn_code == IX86_BUILTIN_TZCNT16
32028 || fn_code == IX86_BUILTIN_CTZS)
32029 arg = fold_convert (short_unsigned_type_node, arg);
32030 if (integer_zerop (arg))
32031 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
32032 else
32033 return fold_const_call (CFN_CTZ, type, arg);
32035 break;
32037 case IX86_BUILTIN_LZCNT16:
32038 case IX86_BUILTIN_CLZS:
32039 case IX86_BUILTIN_LZCNT32:
32040 case IX86_BUILTIN_LZCNT64:
32041 gcc_assert (n_args == 1);
32042 if (TREE_CODE (args[0]) == INTEGER_CST)
32044 tree type = TREE_TYPE (TREE_TYPE (fndecl));
32045 tree arg = args[0];
32046 if (fn_code == IX86_BUILTIN_LZCNT16
32047 || fn_code == IX86_BUILTIN_CLZS)
32048 arg = fold_convert (short_unsigned_type_node, arg);
32049 if (integer_zerop (arg))
32050 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
32051 else
32052 return fold_const_call (CFN_CLZ, type, arg);
32054 break;
32056 case IX86_BUILTIN_BEXTR32:
32057 case IX86_BUILTIN_BEXTR64:
32058 case IX86_BUILTIN_BEXTRI32:
32059 case IX86_BUILTIN_BEXTRI64:
32060 gcc_assert (n_args == 2);
32061 if (tree_fits_uhwi_p (args[1]))
32063 unsigned HOST_WIDE_INT res = 0;
32064 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
32065 unsigned int start = tree_to_uhwi (args[1]);
32066 unsigned int len = (start & 0xff00) >> 8;
32067 start &= 0xff;
32068 if (start >= prec || len == 0)
32069 res = 0;
32070 else if (!tree_fits_uhwi_p (args[0]))
32071 break;
32072 else
32073 res = tree_to_uhwi (args[0]) >> start;
32074 if (len > prec)
32075 len = prec;
32076 if (len < HOST_BITS_PER_WIDE_INT)
32077 res &= (HOST_WIDE_INT_1U << len) - 1;
32078 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
32080 break;
32082 case IX86_BUILTIN_BZHI32:
32083 case IX86_BUILTIN_BZHI64:
32084 gcc_assert (n_args == 2);
32085 if (tree_fits_uhwi_p (args[1]))
32087 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
32088 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
32089 return args[0];
32090 if (!tree_fits_uhwi_p (args[0]))
32091 break;
32092 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
32093 res &= ~(HOST_WIDE_INT_M1U << idx);
32094 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
32096 break;
32098 case IX86_BUILTIN_PDEP32:
32099 case IX86_BUILTIN_PDEP64:
32100 gcc_assert (n_args == 2);
32101 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
32103 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
32104 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
32105 unsigned HOST_WIDE_INT res = 0;
32106 unsigned HOST_WIDE_INT m, k = 1;
32107 for (m = 1; m; m <<= 1)
32108 if ((mask & m) != 0)
32110 if ((src & k) != 0)
32111 res |= m;
32112 k <<= 1;
32114 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
32116 break;
32118 case IX86_BUILTIN_PEXT32:
32119 case IX86_BUILTIN_PEXT64:
32120 gcc_assert (n_args == 2);
32121 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
32123 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
32124 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
32125 unsigned HOST_WIDE_INT res = 0;
32126 unsigned HOST_WIDE_INT m, k = 1;
32127 for (m = 1; m; m <<= 1)
32128 if ((mask & m) != 0)
32130 if ((src & m) != 0)
32131 res |= k;
32132 k <<= 1;
32134 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
32136 break;
32138 default:
32139 break;
32143 #ifdef SUBTARGET_FOLD_BUILTIN
32144 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32145 #endif
32147 return NULL_TREE;
32150 /* Fold a MD builtin (use ix86_fold_builtin for folding into
32151 constant) in GIMPLE. */
32153 bool
32154 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
32156 gimple *stmt = gsi_stmt (*gsi);
32157 tree fndecl = gimple_call_fndecl (stmt);
32158 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
32159 int n_args = gimple_call_num_args (stmt);
32160 enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl);
32161 tree decl = NULL_TREE;
32162 tree arg0, arg1;
32164 switch (fn_code)
32166 case IX86_BUILTIN_TZCNT32:
32167 decl = builtin_decl_implicit (BUILT_IN_CTZ);
32168 goto fold_tzcnt_lzcnt;
32170 case IX86_BUILTIN_TZCNT64:
32171 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
32172 goto fold_tzcnt_lzcnt;
32174 case IX86_BUILTIN_LZCNT32:
32175 decl = builtin_decl_implicit (BUILT_IN_CLZ);
32176 goto fold_tzcnt_lzcnt;
32178 case IX86_BUILTIN_LZCNT64:
32179 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
32180 goto fold_tzcnt_lzcnt;
32182 fold_tzcnt_lzcnt:
32183 gcc_assert (n_args == 1);
32184 arg0 = gimple_call_arg (stmt, 0);
32185 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
32187 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
32188 /* If arg0 is provably non-zero, optimize into generic
32189 __builtin_c[tl]z{,ll} function the middle-end handles
32190 better. */
32191 if (!expr_not_equal_to (arg0, wi::zero (prec)))
32192 return false;
32194 location_t loc = gimple_location (stmt);
32195 gimple *g = gimple_build_call (decl, 1, arg0);
32196 gimple_set_location (g, loc);
32197 tree lhs = make_ssa_name (integer_type_node);
32198 gimple_call_set_lhs (g, lhs);
32199 gsi_insert_before (gsi, g, GSI_SAME_STMT);
32200 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
32201 gimple_set_location (g, loc);
32202 gsi_replace (gsi, g, false);
32203 return true;
32205 break;
32207 case IX86_BUILTIN_BZHI32:
32208 case IX86_BUILTIN_BZHI64:
32209 gcc_assert (n_args == 2);
32210 arg1 = gimple_call_arg (stmt, 1);
32211 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
32213 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
32214 arg0 = gimple_call_arg (stmt, 0);
32215 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
32216 break;
32217 location_t loc = gimple_location (stmt);
32218 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
32219 gimple_set_location (g, loc);
32220 gsi_replace (gsi, g, false);
32221 return true;
32223 break;
32225 case IX86_BUILTIN_PDEP32:
32226 case IX86_BUILTIN_PDEP64:
32227 case IX86_BUILTIN_PEXT32:
32228 case IX86_BUILTIN_PEXT64:
32229 gcc_assert (n_args == 2);
32230 arg1 = gimple_call_arg (stmt, 1);
32231 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
32233 location_t loc = gimple_location (stmt);
32234 arg0 = gimple_call_arg (stmt, 0);
32235 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
32236 gimple_set_location (g, loc);
32237 gsi_replace (gsi, g, false);
32238 return true;
32240 break;
32242 default:
32243 break;
32246 return false;
32249 /* Make builtins to detect cpu type and features supported. NAME is
32250 the builtin name, CODE is the builtin code, and FTYPE is the function
32251 type of the builtin. */
32253 static void
32254 make_cpu_type_builtin (const char* name, int code,
32255 enum ix86_builtin_func_type ftype, bool is_const)
32257 tree decl;
32258 tree type;
32260 type = ix86_get_builtin_func_type (ftype);
32261 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32262 NULL, NULL_TREE);
32263 gcc_assert (decl != NULL_TREE);
32264 ix86_builtins[(int) code] = decl;
32265 TREE_READONLY (decl) = is_const;
32268 /* Make builtins to get CPU type and features supported. The created
32269 builtins are :
32271 __builtin_cpu_init (), to detect cpu type and features,
32272 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32273 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32276 static void
32277 ix86_init_platform_type_builtins (void)
32279 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32280 INT_FTYPE_VOID, false);
32281 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32282 INT_FTYPE_PCCHAR, true);
32283 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32284 INT_FTYPE_PCCHAR, true);
32287 /* Internal method for ix86_init_builtins. */
32289 static void
32290 ix86_init_builtins_va_builtins_abi (void)
32292 tree ms_va_ref, sysv_va_ref;
32293 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32294 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32295 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32296 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32298 if (!TARGET_64BIT)
32299 return;
32300 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32301 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32302 ms_va_ref = build_reference_type (ms_va_list_type_node);
32303 sysv_va_ref =
32304 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32306 fnvoid_va_end_ms =
32307 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32308 fnvoid_va_start_ms =
32309 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32310 fnvoid_va_end_sysv =
32311 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32312 fnvoid_va_start_sysv =
32313 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32314 NULL_TREE);
32315 fnvoid_va_copy_ms =
32316 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32317 NULL_TREE);
32318 fnvoid_va_copy_sysv =
32319 build_function_type_list (void_type_node, sysv_va_ref,
32320 sysv_va_ref, NULL_TREE);
32322 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32323 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32324 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32325 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32326 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32327 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32328 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32329 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32330 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32331 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32332 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32333 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32336 static void
32337 ix86_init_builtin_types (void)
32339 tree float80_type_node, const_string_type_node;
32341 /* The __float80 type. */
32342 float80_type_node = long_double_type_node;
32343 if (TYPE_MODE (float80_type_node) != XFmode)
32345 if (float64x_type_node != NULL_TREE
32346 && TYPE_MODE (float64x_type_node) == XFmode)
32347 float80_type_node = float64x_type_node;
32348 else
32350 /* The __float80 type. */
32351 float80_type_node = make_node (REAL_TYPE);
32353 TYPE_PRECISION (float80_type_node) = 80;
32354 layout_type (float80_type_node);
32357 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32359 /* The __float128 type. The node has already been created as
32360 _Float128, so we only need to register the __float128 name for
32361 it. */
32362 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32364 const_string_type_node
32365 = build_pointer_type (build_qualified_type
32366 (char_type_node, TYPE_QUAL_CONST));
32368 /* This macro is built by i386-builtin-types.awk. */
32369 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32372 static void
32373 ix86_init_builtins (void)
32375 tree ftype, decl;
32377 ix86_init_builtin_types ();
32379 /* Builtins to get CPU type and features. */
32380 ix86_init_platform_type_builtins ();
32382 /* TFmode support builtins. */
32383 def_builtin_const (0, "__builtin_infq",
32384 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32385 def_builtin_const (0, "__builtin_huge_valq",
32386 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32388 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING);
32389 decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ,
32390 BUILT_IN_MD, "nanq", NULL_TREE);
32391 TREE_READONLY (decl) = 1;
32392 ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl;
32394 decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ,
32395 BUILT_IN_MD, "nansq", NULL_TREE);
32396 TREE_READONLY (decl) = 1;
32397 ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl;
32399 /* We will expand them to normal call if SSE isn't available since
32400 they are used by libgcc. */
32401 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32402 decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ,
32403 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32404 TREE_READONLY (decl) = 1;
32405 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
32407 ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32408 decl = add_builtin_function ("__builtin_copysignq", ftype,
32409 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
32410 "__copysigntf3", NULL_TREE);
32411 TREE_READONLY (decl) = 1;
32412 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
32414 ix86_init_tm_builtins ();
32415 ix86_init_mmx_sse_builtins ();
32416 ix86_init_mpx_builtins ();
32418 if (TARGET_LP64)
32419 ix86_init_builtins_va_builtins_abi ();
32421 #ifdef SUBTARGET_INIT_BUILTINS
32422 SUBTARGET_INIT_BUILTINS;
32423 #endif
32426 /* Return the ix86 builtin for CODE. */
32428 static tree
32429 ix86_builtin_decl (unsigned code, bool)
32431 if (code >= IX86_BUILTIN_MAX)
32432 return error_mark_node;
32434 return ix86_builtins[code];
32437 /* Errors in the source file can cause expand_expr to return const0_rtx
32438 where we expect a vector. To avoid crashing, use one of the vector
32439 clear instructions. */
32440 static rtx
32441 safe_vector_operand (rtx x, machine_mode mode)
32443 if (x == const0_rtx)
32444 x = CONST0_RTX (mode);
32445 return x;
32448 /* Fixup modeless constants to fit required mode. */
32449 static rtx
32450 fixup_modeless_constant (rtx x, machine_mode mode)
32452 if (GET_MODE (x) == VOIDmode)
32453 x = convert_to_mode (mode, x, 1);
32454 return x;
32457 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32459 static rtx
32460 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32462 rtx pat;
32463 tree arg0 = CALL_EXPR_ARG (exp, 0);
32464 tree arg1 = CALL_EXPR_ARG (exp, 1);
32465 rtx op0 = expand_normal (arg0);
32466 rtx op1 = expand_normal (arg1);
32467 machine_mode tmode = insn_data[icode].operand[0].mode;
32468 machine_mode mode0 = insn_data[icode].operand[1].mode;
32469 machine_mode mode1 = insn_data[icode].operand[2].mode;
32471 if (VECTOR_MODE_P (mode0))
32472 op0 = safe_vector_operand (op0, mode0);
32473 if (VECTOR_MODE_P (mode1))
32474 op1 = safe_vector_operand (op1, mode1);
32476 if (optimize || !target
32477 || GET_MODE (target) != tmode
32478 || !insn_data[icode].operand[0].predicate (target, tmode))
32479 target = gen_reg_rtx (tmode);
32481 if (GET_MODE (op1) == SImode && mode1 == TImode)
32483 rtx x = gen_reg_rtx (V4SImode);
32484 emit_insn (gen_sse2_loadd (x, op1));
32485 op1 = gen_lowpart (TImode, x);
32488 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32489 op0 = copy_to_mode_reg (mode0, op0);
32490 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32491 op1 = copy_to_mode_reg (mode1, op1);
32493 pat = GEN_FCN (icode) (target, op0, op1);
32494 if (! pat)
32495 return 0;
32497 emit_insn (pat);
32499 return target;
32502 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32504 static rtx
32505 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32506 enum ix86_builtin_func_type m_type,
32507 enum rtx_code sub_code)
32509 rtx pat;
32510 int i;
32511 int nargs;
32512 bool comparison_p = false;
32513 bool tf_p = false;
32514 bool last_arg_constant = false;
32515 int num_memory = 0;
32516 struct {
32517 rtx op;
32518 machine_mode mode;
32519 } args[4];
32521 machine_mode tmode = insn_data[icode].operand[0].mode;
32523 switch (m_type)
32525 case MULTI_ARG_4_DF2_DI_I:
32526 case MULTI_ARG_4_DF2_DI_I1:
32527 case MULTI_ARG_4_SF2_SI_I:
32528 case MULTI_ARG_4_SF2_SI_I1:
32529 nargs = 4;
32530 last_arg_constant = true;
32531 break;
32533 case MULTI_ARG_3_SF:
32534 case MULTI_ARG_3_DF:
32535 case MULTI_ARG_3_SF2:
32536 case MULTI_ARG_3_DF2:
32537 case MULTI_ARG_3_DI:
32538 case MULTI_ARG_3_SI:
32539 case MULTI_ARG_3_SI_DI:
32540 case MULTI_ARG_3_HI:
32541 case MULTI_ARG_3_HI_SI:
32542 case MULTI_ARG_3_QI:
32543 case MULTI_ARG_3_DI2:
32544 case MULTI_ARG_3_SI2:
32545 case MULTI_ARG_3_HI2:
32546 case MULTI_ARG_3_QI2:
32547 nargs = 3;
32548 break;
32550 case MULTI_ARG_2_SF:
32551 case MULTI_ARG_2_DF:
32552 case MULTI_ARG_2_DI:
32553 case MULTI_ARG_2_SI:
32554 case MULTI_ARG_2_HI:
32555 case MULTI_ARG_2_QI:
32556 nargs = 2;
32557 break;
32559 case MULTI_ARG_2_DI_IMM:
32560 case MULTI_ARG_2_SI_IMM:
32561 case MULTI_ARG_2_HI_IMM:
32562 case MULTI_ARG_2_QI_IMM:
32563 nargs = 2;
32564 last_arg_constant = true;
32565 break;
32567 case MULTI_ARG_1_SF:
32568 case MULTI_ARG_1_DF:
32569 case MULTI_ARG_1_SF2:
32570 case MULTI_ARG_1_DF2:
32571 case MULTI_ARG_1_DI:
32572 case MULTI_ARG_1_SI:
32573 case MULTI_ARG_1_HI:
32574 case MULTI_ARG_1_QI:
32575 case MULTI_ARG_1_SI_DI:
32576 case MULTI_ARG_1_HI_DI:
32577 case MULTI_ARG_1_HI_SI:
32578 case MULTI_ARG_1_QI_DI:
32579 case MULTI_ARG_1_QI_SI:
32580 case MULTI_ARG_1_QI_HI:
32581 nargs = 1;
32582 break;
32584 case MULTI_ARG_2_DI_CMP:
32585 case MULTI_ARG_2_SI_CMP:
32586 case MULTI_ARG_2_HI_CMP:
32587 case MULTI_ARG_2_QI_CMP:
32588 nargs = 2;
32589 comparison_p = true;
32590 break;
32592 case MULTI_ARG_2_SF_TF:
32593 case MULTI_ARG_2_DF_TF:
32594 case MULTI_ARG_2_DI_TF:
32595 case MULTI_ARG_2_SI_TF:
32596 case MULTI_ARG_2_HI_TF:
32597 case MULTI_ARG_2_QI_TF:
32598 nargs = 2;
32599 tf_p = true;
32600 break;
32602 default:
32603 gcc_unreachable ();
32606 if (optimize || !target
32607 || GET_MODE (target) != tmode
32608 || !insn_data[icode].operand[0].predicate (target, tmode))
32609 target = gen_reg_rtx (tmode);
32610 else if (memory_operand (target, tmode))
32611 num_memory++;
32613 gcc_assert (nargs <= 4);
32615 for (i = 0; i < nargs; i++)
32617 tree arg = CALL_EXPR_ARG (exp, i);
32618 rtx op = expand_normal (arg);
32619 int adjust = (comparison_p) ? 1 : 0;
32620 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32622 if (last_arg_constant && i == nargs - 1)
32624 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32626 enum insn_code new_icode = icode;
32627 switch (icode)
32629 case CODE_FOR_xop_vpermil2v2df3:
32630 case CODE_FOR_xop_vpermil2v4sf3:
32631 case CODE_FOR_xop_vpermil2v4df3:
32632 case CODE_FOR_xop_vpermil2v8sf3:
32633 error ("the last argument must be a 2-bit immediate");
32634 return gen_reg_rtx (tmode);
32635 case CODE_FOR_xop_rotlv2di3:
32636 new_icode = CODE_FOR_rotlv2di3;
32637 goto xop_rotl;
32638 case CODE_FOR_xop_rotlv4si3:
32639 new_icode = CODE_FOR_rotlv4si3;
32640 goto xop_rotl;
32641 case CODE_FOR_xop_rotlv8hi3:
32642 new_icode = CODE_FOR_rotlv8hi3;
32643 goto xop_rotl;
32644 case CODE_FOR_xop_rotlv16qi3:
32645 new_icode = CODE_FOR_rotlv16qi3;
32646 xop_rotl:
32647 if (CONST_INT_P (op))
32649 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
32650 op = GEN_INT (INTVAL (op) & mask);
32651 gcc_checking_assert
32652 (insn_data[icode].operand[i + 1].predicate (op, mode));
32654 else
32656 gcc_checking_assert
32657 (nargs == 2
32658 && insn_data[new_icode].operand[0].mode == tmode
32659 && insn_data[new_icode].operand[1].mode == tmode
32660 && insn_data[new_icode].operand[2].mode == mode
32661 && insn_data[new_icode].operand[0].predicate
32662 == insn_data[icode].operand[0].predicate
32663 && insn_data[new_icode].operand[1].predicate
32664 == insn_data[icode].operand[1].predicate);
32665 icode = new_icode;
32666 goto non_constant;
32668 break;
32669 default:
32670 gcc_unreachable ();
32674 else
32676 non_constant:
32677 if (VECTOR_MODE_P (mode))
32678 op = safe_vector_operand (op, mode);
32680 /* If we aren't optimizing, only allow one memory operand to be
32681 generated. */
32682 if (memory_operand (op, mode))
32683 num_memory++;
32685 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
32687 if (optimize
32688 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
32689 || num_memory > 1)
32690 op = force_reg (mode, op);
32693 args[i].op = op;
32694 args[i].mode = mode;
32697 switch (nargs)
32699 case 1:
32700 pat = GEN_FCN (icode) (target, args[0].op);
32701 break;
32703 case 2:
32704 if (tf_p)
32705 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
32706 GEN_INT ((int)sub_code));
32707 else if (! comparison_p)
32708 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
32709 else
32711 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
32712 args[0].op,
32713 args[1].op);
32715 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
32717 break;
32719 case 3:
32720 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
32721 break;
32723 case 4:
32724 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
32725 break;
32727 default:
32728 gcc_unreachable ();
32731 if (! pat)
32732 return 0;
32734 emit_insn (pat);
32735 return target;
32738 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
32739 insns with vec_merge. */
32741 static rtx
32742 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
32743 rtx target)
32745 rtx pat;
32746 tree arg0 = CALL_EXPR_ARG (exp, 0);
32747 rtx op1, op0 = expand_normal (arg0);
32748 machine_mode tmode = insn_data[icode].operand[0].mode;
32749 machine_mode mode0 = insn_data[icode].operand[1].mode;
32751 if (optimize || !target
32752 || GET_MODE (target) != tmode
32753 || !insn_data[icode].operand[0].predicate (target, tmode))
32754 target = gen_reg_rtx (tmode);
32756 if (VECTOR_MODE_P (mode0))
32757 op0 = safe_vector_operand (op0, mode0);
32759 if ((optimize && !register_operand (op0, mode0))
32760 || !insn_data[icode].operand[1].predicate (op0, mode0))
32761 op0 = copy_to_mode_reg (mode0, op0);
32763 op1 = op0;
32764 if (!insn_data[icode].operand[2].predicate (op1, mode0))
32765 op1 = copy_to_mode_reg (mode0, op1);
32767 pat = GEN_FCN (icode) (target, op0, op1);
32768 if (! pat)
32769 return 0;
32770 emit_insn (pat);
32771 return target;
32774 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
32776 static rtx
32777 ix86_expand_sse_compare (const struct builtin_description *d,
32778 tree exp, rtx target, bool swap)
32780 rtx pat;
32781 tree arg0 = CALL_EXPR_ARG (exp, 0);
32782 tree arg1 = CALL_EXPR_ARG (exp, 1);
32783 rtx op0 = expand_normal (arg0);
32784 rtx op1 = expand_normal (arg1);
32785 rtx op2;
32786 machine_mode tmode = insn_data[d->icode].operand[0].mode;
32787 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
32788 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
32789 enum rtx_code comparison = d->comparison;
32791 if (VECTOR_MODE_P (mode0))
32792 op0 = safe_vector_operand (op0, mode0);
32793 if (VECTOR_MODE_P (mode1))
32794 op1 = safe_vector_operand (op1, mode1);
32796 /* Swap operands if we have a comparison that isn't available in
32797 hardware. */
32798 if (swap)
32799 std::swap (op0, op1);
32801 if (optimize || !target
32802 || GET_MODE (target) != tmode
32803 || !insn_data[d->icode].operand[0].predicate (target, tmode))
32804 target = gen_reg_rtx (tmode);
32806 if ((optimize && !register_operand (op0, mode0))
32807 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
32808 op0 = copy_to_mode_reg (mode0, op0);
32809 if ((optimize && !register_operand (op1, mode1))
32810 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
32811 op1 = copy_to_mode_reg (mode1, op1);
32813 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
32814 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
32815 if (! pat)
32816 return 0;
32817 emit_insn (pat);
32818 return target;
32821 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
32823 static rtx
32824 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
32825 rtx target)
32827 rtx pat;
32828 tree arg0 = CALL_EXPR_ARG (exp, 0);
32829 tree arg1 = CALL_EXPR_ARG (exp, 1);
32830 rtx op0 = expand_normal (arg0);
32831 rtx op1 = expand_normal (arg1);
32832 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
32833 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
32834 enum rtx_code comparison = d->comparison;
32836 if (VECTOR_MODE_P (mode0))
32837 op0 = safe_vector_operand (op0, mode0);
32838 if (VECTOR_MODE_P (mode1))
32839 op1 = safe_vector_operand (op1, mode1);
32841 /* Swap operands if we have a comparison that isn't available in
32842 hardware. */
32843 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
32844 std::swap (op0, op1);
32846 target = gen_reg_rtx (SImode);
32847 emit_move_insn (target, const0_rtx);
32848 target = gen_rtx_SUBREG (QImode, target, 0);
32850 if ((optimize && !register_operand (op0, mode0))
32851 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
32852 op0 = copy_to_mode_reg (mode0, op0);
32853 if ((optimize && !register_operand (op1, mode1))
32854 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
32855 op1 = copy_to_mode_reg (mode1, op1);
32857 pat = GEN_FCN (d->icode) (op0, op1);
32858 if (! pat)
32859 return 0;
32860 emit_insn (pat);
32861 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
32862 gen_rtx_fmt_ee (comparison, QImode,
32863 SET_DEST (pat),
32864 const0_rtx)));
32866 return SUBREG_REG (target);
32869 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
32871 static rtx
32872 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
32873 rtx target)
32875 rtx pat;
32876 tree arg0 = CALL_EXPR_ARG (exp, 0);
32877 rtx op1, op0 = expand_normal (arg0);
32878 machine_mode tmode = insn_data[d->icode].operand[0].mode;
32879 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
32881 if (optimize || target == 0
32882 || GET_MODE (target) != tmode
32883 || !insn_data[d->icode].operand[0].predicate (target, tmode))
32884 target = gen_reg_rtx (tmode);
32886 if (VECTOR_MODE_P (mode0))
32887 op0 = safe_vector_operand (op0, mode0);
32889 if ((optimize && !register_operand (op0, mode0))
32890 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
32891 op0 = copy_to_mode_reg (mode0, op0);
32893 op1 = GEN_INT (d->comparison);
32895 pat = GEN_FCN (d->icode) (target, op0, op1);
32896 if (! pat)
32897 return 0;
32898 emit_insn (pat);
32899 return target;
32902 static rtx
32903 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
32904 tree exp, rtx target)
32906 rtx pat;
32907 tree arg0 = CALL_EXPR_ARG (exp, 0);
32908 tree arg1 = CALL_EXPR_ARG (exp, 1);
32909 rtx op0 = expand_normal (arg0);
32910 rtx op1 = expand_normal (arg1);
32911 rtx op2;
32912 machine_mode tmode = insn_data[d->icode].operand[0].mode;
32913 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
32914 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
32916 if (optimize || target == 0
32917 || GET_MODE (target) != tmode
32918 || !insn_data[d->icode].operand[0].predicate (target, tmode))
32919 target = gen_reg_rtx (tmode);
32921 op0 = safe_vector_operand (op0, mode0);
32922 op1 = safe_vector_operand (op1, mode1);
32924 if ((optimize && !register_operand (op0, mode0))
32925 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
32926 op0 = copy_to_mode_reg (mode0, op0);
32927 if ((optimize && !register_operand (op1, mode1))
32928 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
32929 op1 = copy_to_mode_reg (mode1, op1);
32931 op2 = GEN_INT (d->comparison);
32933 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
32934 if (! pat)
32935 return 0;
32936 emit_insn (pat);
32937 return target;
32940 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
32942 static rtx
32943 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
32944 rtx target)
32946 rtx pat;
32947 tree arg0 = CALL_EXPR_ARG (exp, 0);
32948 tree arg1 = CALL_EXPR_ARG (exp, 1);
32949 rtx op0 = expand_normal (arg0);
32950 rtx op1 = expand_normal (arg1);
32951 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
32952 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
32953 enum rtx_code comparison = d->comparison;
32955 if (VECTOR_MODE_P (mode0))
32956 op0 = safe_vector_operand (op0, mode0);
32957 if (VECTOR_MODE_P (mode1))
32958 op1 = safe_vector_operand (op1, mode1);
32960 target = gen_reg_rtx (SImode);
32961 emit_move_insn (target, const0_rtx);
32962 target = gen_rtx_SUBREG (QImode, target, 0);
32964 if ((optimize && !register_operand (op0, mode0))
32965 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
32966 op0 = copy_to_mode_reg (mode0, op0);
32967 if ((optimize && !register_operand (op1, mode1))
32968 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
32969 op1 = copy_to_mode_reg (mode1, op1);
32971 pat = GEN_FCN (d->icode) (op0, op1);
32972 if (! pat)
32973 return 0;
32974 emit_insn (pat);
32975 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
32976 gen_rtx_fmt_ee (comparison, QImode,
32977 SET_DEST (pat),
32978 const0_rtx)));
32980 return SUBREG_REG (target);
32983 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
32985 static rtx
32986 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
32987 tree exp, rtx target)
32989 rtx pat;
32990 tree arg0 = CALL_EXPR_ARG (exp, 0);
32991 tree arg1 = CALL_EXPR_ARG (exp, 1);
32992 tree arg2 = CALL_EXPR_ARG (exp, 2);
32993 tree arg3 = CALL_EXPR_ARG (exp, 3);
32994 tree arg4 = CALL_EXPR_ARG (exp, 4);
32995 rtx scratch0, scratch1;
32996 rtx op0 = expand_normal (arg0);
32997 rtx op1 = expand_normal (arg1);
32998 rtx op2 = expand_normal (arg2);
32999 rtx op3 = expand_normal (arg3);
33000 rtx op4 = expand_normal (arg4);
33001 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33003 tmode0 = insn_data[d->icode].operand[0].mode;
33004 tmode1 = insn_data[d->icode].operand[1].mode;
33005 modev2 = insn_data[d->icode].operand[2].mode;
33006 modei3 = insn_data[d->icode].operand[3].mode;
33007 modev4 = insn_data[d->icode].operand[4].mode;
33008 modei5 = insn_data[d->icode].operand[5].mode;
33009 modeimm = insn_data[d->icode].operand[6].mode;
33011 if (VECTOR_MODE_P (modev2))
33012 op0 = safe_vector_operand (op0, modev2);
33013 if (VECTOR_MODE_P (modev4))
33014 op2 = safe_vector_operand (op2, modev4);
33016 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33017 op0 = copy_to_mode_reg (modev2, op0);
33018 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33019 op1 = copy_to_mode_reg (modei3, op1);
33020 if ((optimize && !register_operand (op2, modev4))
33021 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33022 op2 = copy_to_mode_reg (modev4, op2);
33023 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33024 op3 = copy_to_mode_reg (modei5, op3);
33026 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33028 error ("the fifth argument must be an 8-bit immediate");
33029 return const0_rtx;
33032 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33034 if (optimize || !target
33035 || GET_MODE (target) != tmode0
33036 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33037 target = gen_reg_rtx (tmode0);
33039 scratch1 = gen_reg_rtx (tmode1);
33041 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33043 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33045 if (optimize || !target
33046 || GET_MODE (target) != tmode1
33047 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33048 target = gen_reg_rtx (tmode1);
33050 scratch0 = gen_reg_rtx (tmode0);
33052 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33054 else
33056 gcc_assert (d->flag);
33058 scratch0 = gen_reg_rtx (tmode0);
33059 scratch1 = gen_reg_rtx (tmode1);
33061 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33064 if (! pat)
33065 return 0;
33067 emit_insn (pat);
33069 if (d->flag)
33071 target = gen_reg_rtx (SImode);
33072 emit_move_insn (target, const0_rtx);
33073 target = gen_rtx_SUBREG (QImode, target, 0);
33075 emit_insn
33076 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33077 gen_rtx_fmt_ee (EQ, QImode,
33078 gen_rtx_REG ((machine_mode) d->flag,
33079 FLAGS_REG),
33080 const0_rtx)));
33081 return SUBREG_REG (target);
33083 else
33084 return target;
33088 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33090 static rtx
33091 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33092 tree exp, rtx target)
33094 rtx pat;
33095 tree arg0 = CALL_EXPR_ARG (exp, 0);
33096 tree arg1 = CALL_EXPR_ARG (exp, 1);
33097 tree arg2 = CALL_EXPR_ARG (exp, 2);
33098 rtx scratch0, scratch1;
33099 rtx op0 = expand_normal (arg0);
33100 rtx op1 = expand_normal (arg1);
33101 rtx op2 = expand_normal (arg2);
33102 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33104 tmode0 = insn_data[d->icode].operand[0].mode;
33105 tmode1 = insn_data[d->icode].operand[1].mode;
33106 modev2 = insn_data[d->icode].operand[2].mode;
33107 modev3 = insn_data[d->icode].operand[3].mode;
33108 modeimm = insn_data[d->icode].operand[4].mode;
33110 if (VECTOR_MODE_P (modev2))
33111 op0 = safe_vector_operand (op0, modev2);
33112 if (VECTOR_MODE_P (modev3))
33113 op1 = safe_vector_operand (op1, modev3);
33115 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33116 op0 = copy_to_mode_reg (modev2, op0);
33117 if ((optimize && !register_operand (op1, modev3))
33118 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33119 op1 = copy_to_mode_reg (modev3, op1);
33121 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33123 error ("the third argument must be an 8-bit immediate");
33124 return const0_rtx;
33127 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33129 if (optimize || !target
33130 || GET_MODE (target) != tmode0
33131 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33132 target = gen_reg_rtx (tmode0);
33134 scratch1 = gen_reg_rtx (tmode1);
33136 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33138 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33140 if (optimize || !target
33141 || GET_MODE (target) != tmode1
33142 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33143 target = gen_reg_rtx (tmode1);
33145 scratch0 = gen_reg_rtx (tmode0);
33147 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33149 else
33151 gcc_assert (d->flag);
33153 scratch0 = gen_reg_rtx (tmode0);
33154 scratch1 = gen_reg_rtx (tmode1);
33156 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33159 if (! pat)
33160 return 0;
33162 emit_insn (pat);
33164 if (d->flag)
33166 target = gen_reg_rtx (SImode);
33167 emit_move_insn (target, const0_rtx);
33168 target = gen_rtx_SUBREG (QImode, target, 0);
33170 emit_insn
33171 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33172 gen_rtx_fmt_ee (EQ, QImode,
33173 gen_rtx_REG ((machine_mode) d->flag,
33174 FLAGS_REG),
33175 const0_rtx)));
33176 return SUBREG_REG (target);
33178 else
33179 return target;
33182 /* Subroutine of ix86_expand_builtin to take care of insns with
33183 variable number of operands. */
33185 static rtx
33186 ix86_expand_args_builtin (const struct builtin_description *d,
33187 tree exp, rtx target)
33189 rtx pat, real_target;
33190 unsigned int i, nargs;
33191 unsigned int nargs_constant = 0;
33192 unsigned int mask_pos = 0;
33193 int num_memory = 0;
33194 struct
33196 rtx op;
33197 machine_mode mode;
33198 } args[6];
33199 bool second_arg_count = false;
33200 enum insn_code icode = d->icode;
33201 const struct insn_data_d *insn_p = &insn_data[icode];
33202 machine_mode tmode = insn_p->operand[0].mode;
33203 machine_mode rmode = VOIDmode;
33204 bool swap = false;
33205 enum rtx_code comparison = d->comparison;
33207 switch ((enum ix86_builtin_func_type) d->flag)
33209 case V2DF_FTYPE_V2DF_ROUND:
33210 case V4DF_FTYPE_V4DF_ROUND:
33211 case V8DF_FTYPE_V8DF_ROUND:
33212 case V4SF_FTYPE_V4SF_ROUND:
33213 case V8SF_FTYPE_V8SF_ROUND:
33214 case V16SF_FTYPE_V16SF_ROUND:
33215 case V4SI_FTYPE_V4SF_ROUND:
33216 case V8SI_FTYPE_V8SF_ROUND:
33217 case V16SI_FTYPE_V16SF_ROUND:
33218 return ix86_expand_sse_round (d, exp, target);
33219 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33220 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33221 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33222 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33223 case INT_FTYPE_V8SF_V8SF_PTEST:
33224 case INT_FTYPE_V4DI_V4DI_PTEST:
33225 case INT_FTYPE_V4DF_V4DF_PTEST:
33226 case INT_FTYPE_V4SF_V4SF_PTEST:
33227 case INT_FTYPE_V2DI_V2DI_PTEST:
33228 case INT_FTYPE_V2DF_V2DF_PTEST:
33229 return ix86_expand_sse_ptest (d, exp, target);
33230 case FLOAT128_FTYPE_FLOAT128:
33231 case FLOAT_FTYPE_FLOAT:
33232 case INT_FTYPE_INT:
33233 case UINT_FTYPE_UINT:
33234 case UINT16_FTYPE_UINT16:
33235 case UINT64_FTYPE_INT:
33236 case UINT64_FTYPE_UINT64:
33237 case INT64_FTYPE_INT64:
33238 case INT64_FTYPE_V4SF:
33239 case INT64_FTYPE_V2DF:
33240 case INT_FTYPE_V16QI:
33241 case INT_FTYPE_V8QI:
33242 case INT_FTYPE_V8SF:
33243 case INT_FTYPE_V4DF:
33244 case INT_FTYPE_V4SF:
33245 case INT_FTYPE_V2DF:
33246 case INT_FTYPE_V32QI:
33247 case V16QI_FTYPE_V16QI:
33248 case V8SI_FTYPE_V8SF:
33249 case V8SI_FTYPE_V4SI:
33250 case V8HI_FTYPE_V8HI:
33251 case V8HI_FTYPE_V16QI:
33252 case V8QI_FTYPE_V8QI:
33253 case V8SF_FTYPE_V8SF:
33254 case V8SF_FTYPE_V8SI:
33255 case V8SF_FTYPE_V4SF:
33256 case V8SF_FTYPE_V8HI:
33257 case V4SI_FTYPE_V4SI:
33258 case V4SI_FTYPE_V16QI:
33259 case V4SI_FTYPE_V4SF:
33260 case V4SI_FTYPE_V8SI:
33261 case V4SI_FTYPE_V8HI:
33262 case V4SI_FTYPE_V4DF:
33263 case V4SI_FTYPE_V2DF:
33264 case V4HI_FTYPE_V4HI:
33265 case V4DF_FTYPE_V4DF:
33266 case V4DF_FTYPE_V4SI:
33267 case V4DF_FTYPE_V4SF:
33268 case V4DF_FTYPE_V2DF:
33269 case V4SF_FTYPE_V4SF:
33270 case V4SF_FTYPE_V4SI:
33271 case V4SF_FTYPE_V8SF:
33272 case V4SF_FTYPE_V4DF:
33273 case V4SF_FTYPE_V8HI:
33274 case V4SF_FTYPE_V2DF:
33275 case V2DI_FTYPE_V2DI:
33276 case V2DI_FTYPE_V16QI:
33277 case V2DI_FTYPE_V8HI:
33278 case V2DI_FTYPE_V4SI:
33279 case V2DF_FTYPE_V2DF:
33280 case V2DF_FTYPE_V4SI:
33281 case V2DF_FTYPE_V4DF:
33282 case V2DF_FTYPE_V4SF:
33283 case V2DF_FTYPE_V2SI:
33284 case V2SI_FTYPE_V2SI:
33285 case V2SI_FTYPE_V4SF:
33286 case V2SI_FTYPE_V2SF:
33287 case V2SI_FTYPE_V2DF:
33288 case V2SF_FTYPE_V2SF:
33289 case V2SF_FTYPE_V2SI:
33290 case V32QI_FTYPE_V32QI:
33291 case V32QI_FTYPE_V16QI:
33292 case V16HI_FTYPE_V16HI:
33293 case V16HI_FTYPE_V8HI:
33294 case V8SI_FTYPE_V8SI:
33295 case V16HI_FTYPE_V16QI:
33296 case V8SI_FTYPE_V16QI:
33297 case V4DI_FTYPE_V16QI:
33298 case V8SI_FTYPE_V8HI:
33299 case V4DI_FTYPE_V8HI:
33300 case V4DI_FTYPE_V4SI:
33301 case V4DI_FTYPE_V2DI:
33302 case UQI_FTYPE_UQI:
33303 case UHI_FTYPE_UHI:
33304 case USI_FTYPE_USI:
33305 case USI_FTYPE_UQI:
33306 case USI_FTYPE_UHI:
33307 case UDI_FTYPE_UDI:
33308 case UHI_FTYPE_V16QI:
33309 case USI_FTYPE_V32QI:
33310 case UDI_FTYPE_V64QI:
33311 case V16QI_FTYPE_UHI:
33312 case V32QI_FTYPE_USI:
33313 case V64QI_FTYPE_UDI:
33314 case V8HI_FTYPE_UQI:
33315 case V16HI_FTYPE_UHI:
33316 case V32HI_FTYPE_USI:
33317 case V4SI_FTYPE_UQI:
33318 case V8SI_FTYPE_UQI:
33319 case V4SI_FTYPE_UHI:
33320 case V8SI_FTYPE_UHI:
33321 case UQI_FTYPE_V8HI:
33322 case UHI_FTYPE_V16HI:
33323 case USI_FTYPE_V32HI:
33324 case UQI_FTYPE_V4SI:
33325 case UQI_FTYPE_V8SI:
33326 case UHI_FTYPE_V16SI:
33327 case UQI_FTYPE_V2DI:
33328 case UQI_FTYPE_V4DI:
33329 case UQI_FTYPE_V8DI:
33330 case V16SI_FTYPE_UHI:
33331 case V2DI_FTYPE_UQI:
33332 case V4DI_FTYPE_UQI:
33333 case V16SI_FTYPE_INT:
33334 case V16SF_FTYPE_V8SF:
33335 case V16SI_FTYPE_V8SI:
33336 case V16SF_FTYPE_V4SF:
33337 case V16SI_FTYPE_V4SI:
33338 case V16SI_FTYPE_V16SF:
33339 case V16SI_FTYPE_V16SI:
33340 case V16SF_FTYPE_V16SF:
33341 case V8DI_FTYPE_UQI:
33342 case V8DI_FTYPE_V8DI:
33343 case V8DF_FTYPE_V4DF:
33344 case V8DF_FTYPE_V2DF:
33345 case V8DF_FTYPE_V8DF:
33346 nargs = 1;
33347 break;
33348 case V4SF_FTYPE_V4SF_VEC_MERGE:
33349 case V2DF_FTYPE_V2DF_VEC_MERGE:
33350 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33351 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33352 case V16QI_FTYPE_V16QI_V16QI:
33353 case V16QI_FTYPE_V8HI_V8HI:
33354 case V16SF_FTYPE_V16SF_V16SF:
33355 case V8QI_FTYPE_V8QI_V8QI:
33356 case V8QI_FTYPE_V4HI_V4HI:
33357 case V8HI_FTYPE_V8HI_V8HI:
33358 case V8HI_FTYPE_V16QI_V16QI:
33359 case V8HI_FTYPE_V4SI_V4SI:
33360 case V8SF_FTYPE_V8SF_V8SF:
33361 case V8SF_FTYPE_V8SF_V8SI:
33362 case V8DF_FTYPE_V8DF_V8DF:
33363 case V4SI_FTYPE_V4SI_V4SI:
33364 case V4SI_FTYPE_V8HI_V8HI:
33365 case V4SI_FTYPE_V2DF_V2DF:
33366 case V4HI_FTYPE_V4HI_V4HI:
33367 case V4HI_FTYPE_V8QI_V8QI:
33368 case V4HI_FTYPE_V2SI_V2SI:
33369 case V4DF_FTYPE_V4DF_V4DF:
33370 case V4DF_FTYPE_V4DF_V4DI:
33371 case V4SF_FTYPE_V4SF_V4SF:
33372 case V4SF_FTYPE_V4SF_V4SI:
33373 case V4SF_FTYPE_V4SF_V2SI:
33374 case V4SF_FTYPE_V4SF_V2DF:
33375 case V4SF_FTYPE_V4SF_UINT:
33376 case V4SF_FTYPE_V4SF_DI:
33377 case V4SF_FTYPE_V4SF_SI:
33378 case V2DI_FTYPE_V2DI_V2DI:
33379 case V2DI_FTYPE_V16QI_V16QI:
33380 case V2DI_FTYPE_V4SI_V4SI:
33381 case V2DI_FTYPE_V2DI_V16QI:
33382 case V2SI_FTYPE_V2SI_V2SI:
33383 case V2SI_FTYPE_V4HI_V4HI:
33384 case V2SI_FTYPE_V2SF_V2SF:
33385 case V2DF_FTYPE_V2DF_V2DF:
33386 case V2DF_FTYPE_V2DF_V4SF:
33387 case V2DF_FTYPE_V2DF_V2DI:
33388 case V2DF_FTYPE_V2DF_DI:
33389 case V2DF_FTYPE_V2DF_SI:
33390 case V2DF_FTYPE_V2DF_UINT:
33391 case V2SF_FTYPE_V2SF_V2SF:
33392 case V1DI_FTYPE_V1DI_V1DI:
33393 case V1DI_FTYPE_V8QI_V8QI:
33394 case V1DI_FTYPE_V2SI_V2SI:
33395 case V32QI_FTYPE_V16HI_V16HI:
33396 case V16HI_FTYPE_V8SI_V8SI:
33397 case V32QI_FTYPE_V32QI_V32QI:
33398 case V16HI_FTYPE_V32QI_V32QI:
33399 case V16HI_FTYPE_V16HI_V16HI:
33400 case V8SI_FTYPE_V4DF_V4DF:
33401 case V8SI_FTYPE_V8SI_V8SI:
33402 case V8SI_FTYPE_V16HI_V16HI:
33403 case V4DI_FTYPE_V4DI_V4DI:
33404 case V4DI_FTYPE_V8SI_V8SI:
33405 case V8DI_FTYPE_V64QI_V64QI:
33406 if (comparison == UNKNOWN)
33407 return ix86_expand_binop_builtin (icode, exp, target);
33408 nargs = 2;
33409 break;
33410 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33411 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33412 gcc_assert (comparison != UNKNOWN);
33413 nargs = 2;
33414 swap = true;
33415 break;
33416 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33417 case V16HI_FTYPE_V16HI_SI_COUNT:
33418 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33419 case V8SI_FTYPE_V8SI_SI_COUNT:
33420 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33421 case V4DI_FTYPE_V4DI_INT_COUNT:
33422 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33423 case V8HI_FTYPE_V8HI_SI_COUNT:
33424 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33425 case V4SI_FTYPE_V4SI_SI_COUNT:
33426 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33427 case V4HI_FTYPE_V4HI_SI_COUNT:
33428 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33429 case V2DI_FTYPE_V2DI_SI_COUNT:
33430 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33431 case V2SI_FTYPE_V2SI_SI_COUNT:
33432 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33433 case V1DI_FTYPE_V1DI_SI_COUNT:
33434 nargs = 2;
33435 second_arg_count = true;
33436 break;
33437 case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT:
33438 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT:
33439 case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT:
33440 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT:
33441 case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT:
33442 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT:
33443 case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT:
33444 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT:
33445 case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT:
33446 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT:
33447 case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT:
33448 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT:
33449 case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT:
33450 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT:
33451 case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT:
33452 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT:
33453 case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT:
33454 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT:
33455 nargs = 4;
33456 second_arg_count = true;
33457 break;
33458 case UINT64_FTYPE_UINT64_UINT64:
33459 case UINT_FTYPE_UINT_UINT:
33460 case UINT_FTYPE_UINT_USHORT:
33461 case UINT_FTYPE_UINT_UCHAR:
33462 case UINT16_FTYPE_UINT16_INT:
33463 case UINT8_FTYPE_UINT8_INT:
33464 case UQI_FTYPE_UQI_UQI:
33465 case UHI_FTYPE_UHI_UHI:
33466 case USI_FTYPE_USI_USI:
33467 case UDI_FTYPE_UDI_UDI:
33468 case V16SI_FTYPE_V8DF_V8DF:
33469 nargs = 2;
33470 break;
33471 case V2DI_FTYPE_V2DI_INT_CONVERT:
33472 nargs = 2;
33473 rmode = V1TImode;
33474 nargs_constant = 1;
33475 break;
33476 case V4DI_FTYPE_V4DI_INT_CONVERT:
33477 nargs = 2;
33478 rmode = V2TImode;
33479 nargs_constant = 1;
33480 break;
33481 case V8DI_FTYPE_V8DI_INT_CONVERT:
33482 nargs = 2;
33483 rmode = V4TImode;
33484 nargs_constant = 1;
33485 break;
33486 case V8HI_FTYPE_V8HI_INT:
33487 case V8HI_FTYPE_V8SF_INT:
33488 case V16HI_FTYPE_V16SF_INT:
33489 case V8HI_FTYPE_V4SF_INT:
33490 case V8SF_FTYPE_V8SF_INT:
33491 case V4SF_FTYPE_V16SF_INT:
33492 case V16SF_FTYPE_V16SF_INT:
33493 case V4SI_FTYPE_V4SI_INT:
33494 case V4SI_FTYPE_V8SI_INT:
33495 case V4HI_FTYPE_V4HI_INT:
33496 case V4DF_FTYPE_V4DF_INT:
33497 case V4DF_FTYPE_V8DF_INT:
33498 case V4SF_FTYPE_V4SF_INT:
33499 case V4SF_FTYPE_V8SF_INT:
33500 case V2DI_FTYPE_V2DI_INT:
33501 case V2DF_FTYPE_V2DF_INT:
33502 case V2DF_FTYPE_V4DF_INT:
33503 case V16HI_FTYPE_V16HI_INT:
33504 case V8SI_FTYPE_V8SI_INT:
33505 case V16SI_FTYPE_V16SI_INT:
33506 case V4SI_FTYPE_V16SI_INT:
33507 case V4DI_FTYPE_V4DI_INT:
33508 case V2DI_FTYPE_V4DI_INT:
33509 case V4DI_FTYPE_V8DI_INT:
33510 case QI_FTYPE_V4SF_INT:
33511 case QI_FTYPE_V2DF_INT:
33512 case UQI_FTYPE_UQI_UQI_CONST:
33513 case UHI_FTYPE_UHI_UQI:
33514 case USI_FTYPE_USI_UQI:
33515 case UDI_FTYPE_UDI_UQI:
33516 nargs = 2;
33517 nargs_constant = 1;
33518 break;
33519 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33520 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33521 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33522 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33523 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33524 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33525 case UHI_FTYPE_V16SI_V16SI_UHI:
33526 case UQI_FTYPE_V8DI_V8DI_UQI:
33527 case V16HI_FTYPE_V16SI_V16HI_UHI:
33528 case V16QI_FTYPE_V16SI_V16QI_UHI:
33529 case V16QI_FTYPE_V8DI_V16QI_UQI:
33530 case V16SF_FTYPE_V16SF_V16SF_UHI:
33531 case V16SF_FTYPE_V4SF_V16SF_UHI:
33532 case V16SI_FTYPE_SI_V16SI_UHI:
33533 case V16SI_FTYPE_V16HI_V16SI_UHI:
33534 case V16SI_FTYPE_V16QI_V16SI_UHI:
33535 case V8SF_FTYPE_V4SF_V8SF_UQI:
33536 case V4DF_FTYPE_V2DF_V4DF_UQI:
33537 case V8SI_FTYPE_V4SI_V8SI_UQI:
33538 case V8SI_FTYPE_SI_V8SI_UQI:
33539 case V4SI_FTYPE_V4SI_V4SI_UQI:
33540 case V4SI_FTYPE_SI_V4SI_UQI:
33541 case V4DI_FTYPE_V2DI_V4DI_UQI:
33542 case V4DI_FTYPE_DI_V4DI_UQI:
33543 case V2DI_FTYPE_V2DI_V2DI_UQI:
33544 case V2DI_FTYPE_DI_V2DI_UQI:
33545 case V64QI_FTYPE_V64QI_V64QI_UDI:
33546 case V64QI_FTYPE_V16QI_V64QI_UDI:
33547 case V64QI_FTYPE_QI_V64QI_UDI:
33548 case V32QI_FTYPE_V32QI_V32QI_USI:
33549 case V32QI_FTYPE_V16QI_V32QI_USI:
33550 case V32QI_FTYPE_QI_V32QI_USI:
33551 case V16QI_FTYPE_V16QI_V16QI_UHI:
33552 case V16QI_FTYPE_QI_V16QI_UHI:
33553 case V32HI_FTYPE_V8HI_V32HI_USI:
33554 case V32HI_FTYPE_HI_V32HI_USI:
33555 case V16HI_FTYPE_V8HI_V16HI_UHI:
33556 case V16HI_FTYPE_HI_V16HI_UHI:
33557 case V8HI_FTYPE_V8HI_V8HI_UQI:
33558 case V8HI_FTYPE_HI_V8HI_UQI:
33559 case V8SF_FTYPE_V8HI_V8SF_UQI:
33560 case V4SF_FTYPE_V8HI_V4SF_UQI:
33561 case V8SI_FTYPE_V8SF_V8SI_UQI:
33562 case V4SI_FTYPE_V4SF_V4SI_UQI:
33563 case V4DI_FTYPE_V4SF_V4DI_UQI:
33564 case V2DI_FTYPE_V4SF_V2DI_UQI:
33565 case V4SF_FTYPE_V4DI_V4SF_UQI:
33566 case V4SF_FTYPE_V2DI_V4SF_UQI:
33567 case V4DF_FTYPE_V4DI_V4DF_UQI:
33568 case V2DF_FTYPE_V2DI_V2DF_UQI:
33569 case V16QI_FTYPE_V8HI_V16QI_UQI:
33570 case V16QI_FTYPE_V16HI_V16QI_UHI:
33571 case V16QI_FTYPE_V4SI_V16QI_UQI:
33572 case V16QI_FTYPE_V8SI_V16QI_UQI:
33573 case V8HI_FTYPE_V4SI_V8HI_UQI:
33574 case V8HI_FTYPE_V8SI_V8HI_UQI:
33575 case V16QI_FTYPE_V2DI_V16QI_UQI:
33576 case V16QI_FTYPE_V4DI_V16QI_UQI:
33577 case V8HI_FTYPE_V2DI_V8HI_UQI:
33578 case V8HI_FTYPE_V4DI_V8HI_UQI:
33579 case V4SI_FTYPE_V2DI_V4SI_UQI:
33580 case V4SI_FTYPE_V4DI_V4SI_UQI:
33581 case V32QI_FTYPE_V32HI_V32QI_USI:
33582 case UHI_FTYPE_V16QI_V16QI_UHI:
33583 case USI_FTYPE_V32QI_V32QI_USI:
33584 case UDI_FTYPE_V64QI_V64QI_UDI:
33585 case UQI_FTYPE_V8HI_V8HI_UQI:
33586 case UHI_FTYPE_V16HI_V16HI_UHI:
33587 case USI_FTYPE_V32HI_V32HI_USI:
33588 case UQI_FTYPE_V4SI_V4SI_UQI:
33589 case UQI_FTYPE_V8SI_V8SI_UQI:
33590 case UQI_FTYPE_V2DI_V2DI_UQI:
33591 case UQI_FTYPE_V4DI_V4DI_UQI:
33592 case V4SF_FTYPE_V2DF_V4SF_UQI:
33593 case V4SF_FTYPE_V4DF_V4SF_UQI:
33594 case V16SI_FTYPE_V16SI_V16SI_UHI:
33595 case V16SI_FTYPE_V4SI_V16SI_UHI:
33596 case V2DI_FTYPE_V4SI_V2DI_UQI:
33597 case V2DI_FTYPE_V8HI_V2DI_UQI:
33598 case V2DI_FTYPE_V16QI_V2DI_UQI:
33599 case V4DI_FTYPE_V4DI_V4DI_UQI:
33600 case V4DI_FTYPE_V4SI_V4DI_UQI:
33601 case V4DI_FTYPE_V8HI_V4DI_UQI:
33602 case V4DI_FTYPE_V16QI_V4DI_UQI:
33603 case V4DI_FTYPE_V4DF_V4DI_UQI:
33604 case V2DI_FTYPE_V2DF_V2DI_UQI:
33605 case V4SI_FTYPE_V4DF_V4SI_UQI:
33606 case V4SI_FTYPE_V2DF_V4SI_UQI:
33607 case V4SI_FTYPE_V8HI_V4SI_UQI:
33608 case V4SI_FTYPE_V16QI_V4SI_UQI:
33609 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33610 case V8DF_FTYPE_V2DF_V8DF_UQI:
33611 case V8DF_FTYPE_V4DF_V8DF_UQI:
33612 case V8DF_FTYPE_V8DF_V8DF_UQI:
33613 case V8SF_FTYPE_V8SF_V8SF_UQI:
33614 case V8SF_FTYPE_V8SI_V8SF_UQI:
33615 case V4DF_FTYPE_V4DF_V4DF_UQI:
33616 case V4SF_FTYPE_V4SF_V4SF_UQI:
33617 case V2DF_FTYPE_V2DF_V2DF_UQI:
33618 case V2DF_FTYPE_V4SF_V2DF_UQI:
33619 case V2DF_FTYPE_V4SI_V2DF_UQI:
33620 case V4SF_FTYPE_V4SI_V4SF_UQI:
33621 case V4DF_FTYPE_V4SF_V4DF_UQI:
33622 case V4DF_FTYPE_V4SI_V4DF_UQI:
33623 case V8SI_FTYPE_V8SI_V8SI_UQI:
33624 case V8SI_FTYPE_V8HI_V8SI_UQI:
33625 case V8SI_FTYPE_V16QI_V8SI_UQI:
33626 case V8DF_FTYPE_V8SI_V8DF_UQI:
33627 case V8DI_FTYPE_DI_V8DI_UQI:
33628 case V16SF_FTYPE_V8SF_V16SF_UHI:
33629 case V16SI_FTYPE_V8SI_V16SI_UHI:
33630 case V16HI_FTYPE_V16HI_V16HI_UHI:
33631 case V8HI_FTYPE_V16QI_V8HI_UQI:
33632 case V16HI_FTYPE_V16QI_V16HI_UHI:
33633 case V32HI_FTYPE_V32HI_V32HI_USI:
33634 case V32HI_FTYPE_V32QI_V32HI_USI:
33635 case V8DI_FTYPE_V16QI_V8DI_UQI:
33636 case V8DI_FTYPE_V2DI_V8DI_UQI:
33637 case V8DI_FTYPE_V4DI_V8DI_UQI:
33638 case V8DI_FTYPE_V8DI_V8DI_UQI:
33639 case V8DI_FTYPE_V8HI_V8DI_UQI:
33640 case V8DI_FTYPE_V8SI_V8DI_UQI:
33641 case V8HI_FTYPE_V8DI_V8HI_UQI:
33642 case V8SI_FTYPE_V8DI_V8SI_UQI:
33643 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33644 nargs = 3;
33645 break;
33646 case V32QI_FTYPE_V32QI_V32QI_INT:
33647 case V16HI_FTYPE_V16HI_V16HI_INT:
33648 case V16QI_FTYPE_V16QI_V16QI_INT:
33649 case V4DI_FTYPE_V4DI_V4DI_INT:
33650 case V8HI_FTYPE_V8HI_V8HI_INT:
33651 case V8SI_FTYPE_V8SI_V8SI_INT:
33652 case V8SI_FTYPE_V8SI_V4SI_INT:
33653 case V8SF_FTYPE_V8SF_V8SF_INT:
33654 case V8SF_FTYPE_V8SF_V4SF_INT:
33655 case V4SI_FTYPE_V4SI_V4SI_INT:
33656 case V4DF_FTYPE_V4DF_V4DF_INT:
33657 case V16SF_FTYPE_V16SF_V16SF_INT:
33658 case V16SF_FTYPE_V16SF_V4SF_INT:
33659 case V16SI_FTYPE_V16SI_V4SI_INT:
33660 case V4DF_FTYPE_V4DF_V2DF_INT:
33661 case V4SF_FTYPE_V4SF_V4SF_INT:
33662 case V2DI_FTYPE_V2DI_V2DI_INT:
33663 case V4DI_FTYPE_V4DI_V2DI_INT:
33664 case V2DF_FTYPE_V2DF_V2DF_INT:
33665 case UQI_FTYPE_V8DI_V8UDI_INT:
33666 case UQI_FTYPE_V8DF_V8DF_INT:
33667 case UQI_FTYPE_V2DF_V2DF_INT:
33668 case UQI_FTYPE_V4SF_V4SF_INT:
33669 case UHI_FTYPE_V16SI_V16SI_INT:
33670 case UHI_FTYPE_V16SF_V16SF_INT:
33671 case V64QI_FTYPE_V64QI_V64QI_INT:
33672 nargs = 3;
33673 nargs_constant = 1;
33674 break;
33675 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33676 nargs = 3;
33677 rmode = V4DImode;
33678 nargs_constant = 1;
33679 break;
33680 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33681 nargs = 3;
33682 rmode = V2DImode;
33683 nargs_constant = 1;
33684 break;
33685 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33686 nargs = 3;
33687 rmode = DImode;
33688 nargs_constant = 1;
33689 break;
33690 case V2DI_FTYPE_V2DI_UINT_UINT:
33691 nargs = 3;
33692 nargs_constant = 2;
33693 break;
33694 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
33695 nargs = 3;
33696 rmode = V8DImode;
33697 nargs_constant = 1;
33698 break;
33699 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
33700 nargs = 5;
33701 rmode = V8DImode;
33702 mask_pos = 2;
33703 nargs_constant = 1;
33704 break;
33705 case QI_FTYPE_V8DF_INT_UQI:
33706 case QI_FTYPE_V4DF_INT_UQI:
33707 case QI_FTYPE_V2DF_INT_UQI:
33708 case HI_FTYPE_V16SF_INT_UHI:
33709 case QI_FTYPE_V8SF_INT_UQI:
33710 case QI_FTYPE_V4SF_INT_UQI:
33711 nargs = 3;
33712 mask_pos = 1;
33713 nargs_constant = 1;
33714 break;
33715 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
33716 nargs = 5;
33717 rmode = V4DImode;
33718 mask_pos = 2;
33719 nargs_constant = 1;
33720 break;
33721 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
33722 nargs = 5;
33723 rmode = V2DImode;
33724 mask_pos = 2;
33725 nargs_constant = 1;
33726 break;
33727 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
33728 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
33729 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
33730 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
33731 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
33732 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
33733 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
33734 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
33735 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
33736 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
33737 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
33738 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
33739 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
33740 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
33741 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
33742 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
33743 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
33744 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
33745 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
33746 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
33747 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
33748 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
33749 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
33750 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
33751 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
33752 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
33753 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
33754 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
33755 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
33756 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
33757 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
33758 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
33759 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
33760 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
33761 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
33762 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
33763 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
33764 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
33765 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
33766 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
33767 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
33768 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
33769 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
33770 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
33771 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
33772 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
33773 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
33774 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
33775 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
33776 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
33777 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
33778 nargs = 4;
33779 break;
33780 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33781 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33782 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33783 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33784 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33785 nargs = 4;
33786 nargs_constant = 1;
33787 break;
33788 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
33789 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
33790 case QI_FTYPE_V4DF_V4DF_INT_UQI:
33791 case QI_FTYPE_V8SF_V8SF_INT_UQI:
33792 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
33793 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
33794 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
33795 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
33796 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
33797 case USI_FTYPE_V32QI_V32QI_INT_USI:
33798 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
33799 case USI_FTYPE_V32HI_V32HI_INT_USI:
33800 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
33801 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
33802 nargs = 4;
33803 mask_pos = 1;
33804 nargs_constant = 1;
33805 break;
33806 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33807 nargs = 4;
33808 nargs_constant = 2;
33809 break;
33810 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33811 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33812 nargs = 4;
33813 break;
33814 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
33815 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
33816 mask_pos = 1;
33817 nargs = 4;
33818 nargs_constant = 1;
33819 break;
33820 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
33821 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
33822 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
33823 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
33824 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
33825 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
33826 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
33827 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
33828 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
33829 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
33830 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
33831 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
33832 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
33833 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
33834 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
33835 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
33836 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
33837 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
33838 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
33839 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
33840 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
33841 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
33842 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
33843 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
33844 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
33845 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
33846 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
33847 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
33848 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
33849 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
33850 nargs = 4;
33851 mask_pos = 2;
33852 nargs_constant = 1;
33853 break;
33854 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
33855 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
33856 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
33857 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
33858 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
33859 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
33860 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
33861 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
33862 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
33863 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
33864 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
33865 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
33866 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
33867 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
33868 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
33869 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
33870 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
33871 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
33872 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
33873 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
33874 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
33875 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
33876 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
33877 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
33878 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
33879 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
33880 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
33881 nargs = 5;
33882 mask_pos = 2;
33883 nargs_constant = 1;
33884 break;
33885 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
33886 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
33887 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
33888 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
33889 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
33890 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
33891 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
33892 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
33893 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
33894 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
33895 nargs = 5;
33896 mask_pos = 1;
33897 nargs_constant = 1;
33898 break;
33899 case V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI:
33900 case V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI:
33901 case V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI:
33902 nargs = 5;
33903 mask_pos = 1;
33904 nargs_constant = 2;
33905 break;
33907 default:
33908 gcc_unreachable ();
33911 gcc_assert (nargs <= ARRAY_SIZE (args));
33913 if (comparison != UNKNOWN)
33915 gcc_assert (nargs == 2);
33916 return ix86_expand_sse_compare (d, exp, target, swap);
33919 if (rmode == VOIDmode || rmode == tmode)
33921 if (optimize
33922 || target == 0
33923 || GET_MODE (target) != tmode
33924 || !insn_p->operand[0].predicate (target, tmode))
33925 target = gen_reg_rtx (tmode);
33926 else if (memory_operand (target, tmode))
33927 num_memory++;
33928 real_target = target;
33930 else
33932 real_target = gen_reg_rtx (tmode);
33933 target = lowpart_subreg (rmode, real_target, tmode);
33936 for (i = 0; i < nargs; i++)
33938 tree arg = CALL_EXPR_ARG (exp, i);
33939 rtx op = expand_normal (arg);
33940 machine_mode mode = insn_p->operand[i + 1].mode;
33941 bool match = insn_p->operand[i + 1].predicate (op, mode);
33943 if (second_arg_count && i == 1)
33945 /* SIMD shift insns take either an 8-bit immediate or
33946 register as count. But builtin functions take int as
33947 count. If count doesn't match, we put it in register.
33948 The instructions are using 64-bit count, if op is just
33949 32-bit, zero-extend it, as negative shift counts
33950 are undefined behavior and zero-extension is more
33951 efficient. */
33952 if (!match)
33954 if (SCALAR_INT_MODE_P (GET_MODE (op)))
33955 op = convert_modes (mode, GET_MODE (op), op, 1);
33956 else
33957 op = lowpart_subreg (mode, op, GET_MODE (op));
33958 if (!insn_p->operand[i + 1].predicate (op, mode))
33959 op = copy_to_reg (op);
33962 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
33963 (!mask_pos && (nargs - i) <= nargs_constant))
33965 if (!match)
33966 switch (icode)
33968 case CODE_FOR_avx_vinsertf128v4di:
33969 case CODE_FOR_avx_vextractf128v4di:
33970 error ("the last argument must be an 1-bit immediate");
33971 return const0_rtx;
33973 case CODE_FOR_avx512f_cmpv8di3_mask:
33974 case CODE_FOR_avx512f_cmpv16si3_mask:
33975 case CODE_FOR_avx512f_ucmpv8di3_mask:
33976 case CODE_FOR_avx512f_ucmpv16si3_mask:
33977 case CODE_FOR_avx512vl_cmpv4di3_mask:
33978 case CODE_FOR_avx512vl_cmpv8si3_mask:
33979 case CODE_FOR_avx512vl_ucmpv4di3_mask:
33980 case CODE_FOR_avx512vl_ucmpv8si3_mask:
33981 case CODE_FOR_avx512vl_cmpv2di3_mask:
33982 case CODE_FOR_avx512vl_cmpv4si3_mask:
33983 case CODE_FOR_avx512vl_ucmpv2di3_mask:
33984 case CODE_FOR_avx512vl_ucmpv4si3_mask:
33985 error ("the last argument must be a 3-bit immediate");
33986 return const0_rtx;
33988 case CODE_FOR_sse4_1_roundsd:
33989 case CODE_FOR_sse4_1_roundss:
33991 case CODE_FOR_sse4_1_roundpd:
33992 case CODE_FOR_sse4_1_roundps:
33993 case CODE_FOR_avx_roundpd256:
33994 case CODE_FOR_avx_roundps256:
33996 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
33997 case CODE_FOR_sse4_1_roundps_sfix:
33998 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
33999 case CODE_FOR_avx_roundps_sfix256:
34001 case CODE_FOR_sse4_1_blendps:
34002 case CODE_FOR_avx_blendpd256:
34003 case CODE_FOR_avx_vpermilv4df:
34004 case CODE_FOR_avx_vpermilv4df_mask:
34005 case CODE_FOR_avx512f_getmantv8df_mask:
34006 case CODE_FOR_avx512f_getmantv16sf_mask:
34007 case CODE_FOR_avx512vl_getmantv8sf_mask:
34008 case CODE_FOR_avx512vl_getmantv4df_mask:
34009 case CODE_FOR_avx512vl_getmantv4sf_mask:
34010 case CODE_FOR_avx512vl_getmantv2df_mask:
34011 case CODE_FOR_avx512dq_rangepv8df_mask_round:
34012 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
34013 case CODE_FOR_avx512dq_rangepv4df_mask:
34014 case CODE_FOR_avx512dq_rangepv8sf_mask:
34015 case CODE_FOR_avx512dq_rangepv2df_mask:
34016 case CODE_FOR_avx512dq_rangepv4sf_mask:
34017 case CODE_FOR_avx_shufpd256_mask:
34018 error ("the last argument must be a 4-bit immediate");
34019 return const0_rtx;
34021 case CODE_FOR_sha1rnds4:
34022 case CODE_FOR_sse4_1_blendpd:
34023 case CODE_FOR_avx_vpermilv2df:
34024 case CODE_FOR_avx_vpermilv2df_mask:
34025 case CODE_FOR_xop_vpermil2v2df3:
34026 case CODE_FOR_xop_vpermil2v4sf3:
34027 case CODE_FOR_xop_vpermil2v4df3:
34028 case CODE_FOR_xop_vpermil2v8sf3:
34029 case CODE_FOR_avx512f_vinsertf32x4_mask:
34030 case CODE_FOR_avx512f_vinserti32x4_mask:
34031 case CODE_FOR_avx512f_vextractf32x4_mask:
34032 case CODE_FOR_avx512f_vextracti32x4_mask:
34033 case CODE_FOR_sse2_shufpd:
34034 case CODE_FOR_sse2_shufpd_mask:
34035 case CODE_FOR_avx512dq_shuf_f64x2_mask:
34036 case CODE_FOR_avx512dq_shuf_i64x2_mask:
34037 case CODE_FOR_avx512vl_shuf_i32x4_mask:
34038 case CODE_FOR_avx512vl_shuf_f32x4_mask:
34039 error ("the last argument must be a 2-bit immediate");
34040 return const0_rtx;
34042 case CODE_FOR_avx_vextractf128v4df:
34043 case CODE_FOR_avx_vextractf128v8sf:
34044 case CODE_FOR_avx_vextractf128v8si:
34045 case CODE_FOR_avx_vinsertf128v4df:
34046 case CODE_FOR_avx_vinsertf128v8sf:
34047 case CODE_FOR_avx_vinsertf128v8si:
34048 case CODE_FOR_avx512f_vinsertf64x4_mask:
34049 case CODE_FOR_avx512f_vinserti64x4_mask:
34050 case CODE_FOR_avx512f_vextractf64x4_mask:
34051 case CODE_FOR_avx512f_vextracti64x4_mask:
34052 case CODE_FOR_avx512dq_vinsertf32x8_mask:
34053 case CODE_FOR_avx512dq_vinserti32x8_mask:
34054 case CODE_FOR_avx512vl_vinsertv4df:
34055 case CODE_FOR_avx512vl_vinsertv4di:
34056 case CODE_FOR_avx512vl_vinsertv8sf:
34057 case CODE_FOR_avx512vl_vinsertv8si:
34058 error ("the last argument must be a 1-bit immediate");
34059 return const0_rtx;
34061 case CODE_FOR_avx_vmcmpv2df3:
34062 case CODE_FOR_avx_vmcmpv4sf3:
34063 case CODE_FOR_avx_cmpv2df3:
34064 case CODE_FOR_avx_cmpv4sf3:
34065 case CODE_FOR_avx_cmpv4df3:
34066 case CODE_FOR_avx_cmpv8sf3:
34067 case CODE_FOR_avx512f_cmpv8df3_mask:
34068 case CODE_FOR_avx512f_cmpv16sf3_mask:
34069 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34070 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34071 error ("the last argument must be a 5-bit immediate");
34072 return const0_rtx;
34074 default:
34075 switch (nargs_constant)
34077 case 2:
34078 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34079 (!mask_pos && (nargs - i) == nargs_constant))
34081 error ("the next to last argument must be an 8-bit immediate");
34082 break;
34084 /* FALLTHRU */
34085 case 1:
34086 error ("the last argument must be an 8-bit immediate");
34087 break;
34088 default:
34089 gcc_unreachable ();
34091 return const0_rtx;
34094 else
34096 if (VECTOR_MODE_P (mode))
34097 op = safe_vector_operand (op, mode);
34099 /* If we aren't optimizing, only allow one memory operand to
34100 be generated. */
34101 if (memory_operand (op, mode))
34102 num_memory++;
34104 op = fixup_modeless_constant (op, mode);
34106 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34108 if (optimize || !match || num_memory > 1)
34109 op = copy_to_mode_reg (mode, op);
34111 else
34113 op = copy_to_reg (op);
34114 op = lowpart_subreg (mode, op, GET_MODE (op));
34118 args[i].op = op;
34119 args[i].mode = mode;
34122 switch (nargs)
34124 case 1:
34125 pat = GEN_FCN (icode) (real_target, args[0].op);
34126 break;
34127 case 2:
34128 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34129 break;
34130 case 3:
34131 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34132 args[2].op);
34133 break;
34134 case 4:
34135 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34136 args[2].op, args[3].op);
34137 break;
34138 case 5:
34139 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34140 args[2].op, args[3].op, args[4].op);
34141 break;
34142 case 6:
34143 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34144 args[2].op, args[3].op, args[4].op,
34145 args[5].op);
34146 break;
34147 default:
34148 gcc_unreachable ();
34151 if (! pat)
34152 return 0;
34154 emit_insn (pat);
34155 return target;
34158 /* Transform pattern of following layout:
34159 (set A
34160 (unspec [B C] UNSPEC_EMBEDDED_ROUNDING))
34162 into:
34163 (set (A B)) */
34165 static rtx
34166 ix86_erase_embedded_rounding (rtx pat)
34168 if (GET_CODE (pat) == INSN)
34169 pat = PATTERN (pat);
34171 gcc_assert (GET_CODE (pat) == SET);
34172 rtx src = SET_SRC (pat);
34173 gcc_assert (XVECLEN (src, 0) == 2);
34174 rtx p0 = XVECEXP (src, 0, 0);
34175 gcc_assert (GET_CODE (src) == UNSPEC
34176 && XINT (src, 1) == UNSPEC_EMBEDDED_ROUNDING);
34177 rtx res = gen_rtx_SET (SET_DEST (pat), p0);
34178 return res;
34181 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34182 with rounding. */
34183 static rtx
34184 ix86_expand_sse_comi_round (const struct builtin_description *d,
34185 tree exp, rtx target)
34187 rtx pat, set_dst;
34188 tree arg0 = CALL_EXPR_ARG (exp, 0);
34189 tree arg1 = CALL_EXPR_ARG (exp, 1);
34190 tree arg2 = CALL_EXPR_ARG (exp, 2);
34191 tree arg3 = CALL_EXPR_ARG (exp, 3);
34192 rtx op0 = expand_normal (arg0);
34193 rtx op1 = expand_normal (arg1);
34194 rtx op2 = expand_normal (arg2);
34195 rtx op3 = expand_normal (arg3);
34196 enum insn_code icode = d->icode;
34197 const struct insn_data_d *insn_p = &insn_data[icode];
34198 machine_mode mode0 = insn_p->operand[0].mode;
34199 machine_mode mode1 = insn_p->operand[1].mode;
34200 enum rtx_code comparison = UNEQ;
34201 bool need_ucomi = false;
34203 /* See avxintrin.h for values. */
34204 enum rtx_code comi_comparisons[32] =
34206 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34207 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34208 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34210 bool need_ucomi_values[32] =
34212 true, false, false, true, true, false, false, true,
34213 true, false, false, true, true, false, false, true,
34214 false, true, true, false, false, true, true, false,
34215 false, true, true, false, false, true, true, false
34218 if (!CONST_INT_P (op2))
34220 error ("the third argument must be comparison constant");
34221 return const0_rtx;
34223 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34225 error ("incorrect comparison mode");
34226 return const0_rtx;
34229 if (!insn_p->operand[2].predicate (op3, SImode))
34231 error ("incorrect rounding operand");
34232 return const0_rtx;
34235 comparison = comi_comparisons[INTVAL (op2)];
34236 need_ucomi = need_ucomi_values[INTVAL (op2)];
34238 if (VECTOR_MODE_P (mode0))
34239 op0 = safe_vector_operand (op0, mode0);
34240 if (VECTOR_MODE_P (mode1))
34241 op1 = safe_vector_operand (op1, mode1);
34243 target = gen_reg_rtx (SImode);
34244 emit_move_insn (target, const0_rtx);
34245 target = gen_rtx_SUBREG (QImode, target, 0);
34247 if ((optimize && !register_operand (op0, mode0))
34248 || !insn_p->operand[0].predicate (op0, mode0))
34249 op0 = copy_to_mode_reg (mode0, op0);
34250 if ((optimize && !register_operand (op1, mode1))
34251 || !insn_p->operand[1].predicate (op1, mode1))
34252 op1 = copy_to_mode_reg (mode1, op1);
34254 if (need_ucomi)
34255 icode = icode == CODE_FOR_sse_comi_round
34256 ? CODE_FOR_sse_ucomi_round
34257 : CODE_FOR_sse2_ucomi_round;
34259 pat = GEN_FCN (icode) (op0, op1, op3);
34260 if (! pat)
34261 return 0;
34263 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34264 if (INTVAL (op3) == NO_ROUND)
34266 pat = ix86_erase_embedded_rounding (pat);
34267 if (! pat)
34268 return 0;
34270 set_dst = SET_DEST (pat);
34272 else
34274 gcc_assert (GET_CODE (pat) == SET);
34275 set_dst = SET_DEST (pat);
34278 emit_insn (pat);
34279 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34280 gen_rtx_fmt_ee (comparison, QImode,
34281 set_dst,
34282 const0_rtx)));
34284 return SUBREG_REG (target);
34287 static rtx
34288 ix86_expand_round_builtin (const struct builtin_description *d,
34289 tree exp, rtx target)
34291 rtx pat;
34292 unsigned int i, nargs;
34293 struct
34295 rtx op;
34296 machine_mode mode;
34297 } args[6];
34298 enum insn_code icode = d->icode;
34299 const struct insn_data_d *insn_p = &insn_data[icode];
34300 machine_mode tmode = insn_p->operand[0].mode;
34301 unsigned int nargs_constant = 0;
34302 unsigned int redundant_embed_rnd = 0;
34304 switch ((enum ix86_builtin_func_type) d->flag)
34306 case UINT64_FTYPE_V2DF_INT:
34307 case UINT64_FTYPE_V4SF_INT:
34308 case UINT_FTYPE_V2DF_INT:
34309 case UINT_FTYPE_V4SF_INT:
34310 case INT64_FTYPE_V2DF_INT:
34311 case INT64_FTYPE_V4SF_INT:
34312 case INT_FTYPE_V2DF_INT:
34313 case INT_FTYPE_V4SF_INT:
34314 nargs = 2;
34315 break;
34316 case V4SF_FTYPE_V4SF_UINT_INT:
34317 case V4SF_FTYPE_V4SF_UINT64_INT:
34318 case V2DF_FTYPE_V2DF_UINT64_INT:
34319 case V4SF_FTYPE_V4SF_INT_INT:
34320 case V4SF_FTYPE_V4SF_INT64_INT:
34321 case V2DF_FTYPE_V2DF_INT64_INT:
34322 case V4SF_FTYPE_V4SF_V4SF_INT:
34323 case V2DF_FTYPE_V2DF_V2DF_INT:
34324 case V4SF_FTYPE_V4SF_V2DF_INT:
34325 case V2DF_FTYPE_V2DF_V4SF_INT:
34326 nargs = 3;
34327 break;
34328 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34329 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34330 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34331 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
34332 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
34333 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
34334 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34335 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
34336 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34337 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34338 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34339 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34340 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34341 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34342 nargs = 4;
34343 break;
34344 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34345 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34346 nargs_constant = 2;
34347 nargs = 4;
34348 break;
34349 case INT_FTYPE_V4SF_V4SF_INT_INT:
34350 case INT_FTYPE_V2DF_V2DF_INT_INT:
34351 return ix86_expand_sse_comi_round (d, exp, target);
34352 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
34353 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
34354 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
34355 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34356 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34357 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34358 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34359 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34360 nargs = 5;
34361 break;
34362 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34363 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34364 nargs_constant = 4;
34365 nargs = 5;
34366 break;
34367 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
34368 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
34369 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
34370 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
34371 nargs_constant = 3;
34372 nargs = 5;
34373 break;
34374 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
34375 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
34376 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34377 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34378 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT:
34379 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT:
34380 nargs = 6;
34381 nargs_constant = 4;
34382 break;
34383 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34384 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34385 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34386 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34387 nargs = 6;
34388 nargs_constant = 3;
34389 break;
34390 default:
34391 gcc_unreachable ();
34393 gcc_assert (nargs <= ARRAY_SIZE (args));
34395 if (optimize
34396 || target == 0
34397 || GET_MODE (target) != tmode
34398 || !insn_p->operand[0].predicate (target, tmode))
34399 target = gen_reg_rtx (tmode);
34401 for (i = 0; i < nargs; i++)
34403 tree arg = CALL_EXPR_ARG (exp, i);
34404 rtx op = expand_normal (arg);
34405 machine_mode mode = insn_p->operand[i + 1].mode;
34406 bool match = insn_p->operand[i + 1].predicate (op, mode);
34408 if (i == nargs - nargs_constant)
34410 if (!match)
34412 switch (icode)
34414 case CODE_FOR_avx512f_getmantv8df_mask_round:
34415 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34416 case CODE_FOR_avx512f_vgetmantv2df_round:
34417 case CODE_FOR_avx512f_vgetmantv2df_mask_round:
34418 case CODE_FOR_avx512f_vgetmantv4sf_round:
34419 case CODE_FOR_avx512f_vgetmantv4sf_mask_round:
34420 error ("the immediate argument must be a 4-bit immediate");
34421 return const0_rtx;
34422 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34423 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34424 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34425 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34426 error ("the immediate argument must be a 5-bit immediate");
34427 return const0_rtx;
34428 default:
34429 error ("the immediate argument must be an 8-bit immediate");
34430 return const0_rtx;
34434 else if (i == nargs-1)
34436 if (!insn_p->operand[nargs].predicate (op, SImode))
34438 error ("incorrect rounding operand");
34439 return const0_rtx;
34442 /* If there is no rounding use normal version of the pattern. */
34443 if (INTVAL (op) == NO_ROUND)
34444 redundant_embed_rnd = 1;
34446 else
34448 if (VECTOR_MODE_P (mode))
34449 op = safe_vector_operand (op, mode);
34451 op = fixup_modeless_constant (op, mode);
34453 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34455 if (optimize || !match)
34456 op = copy_to_mode_reg (mode, op);
34458 else
34460 op = copy_to_reg (op);
34461 op = lowpart_subreg (mode, op, GET_MODE (op));
34465 args[i].op = op;
34466 args[i].mode = mode;
34469 switch (nargs)
34471 case 1:
34472 pat = GEN_FCN (icode) (target, args[0].op);
34473 break;
34474 case 2:
34475 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34476 break;
34477 case 3:
34478 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34479 args[2].op);
34480 break;
34481 case 4:
34482 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34483 args[2].op, args[3].op);
34484 break;
34485 case 5:
34486 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34487 args[2].op, args[3].op, args[4].op);
34488 break;
34489 case 6:
34490 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34491 args[2].op, args[3].op, args[4].op,
34492 args[5].op);
34493 break;
34494 default:
34495 gcc_unreachable ();
34498 if (!pat)
34499 return 0;
34501 if (redundant_embed_rnd)
34502 pat = ix86_erase_embedded_rounding (pat);
34504 emit_insn (pat);
34505 return target;
34508 /* Subroutine of ix86_expand_builtin to take care of special insns
34509 with variable number of operands. */
34511 static rtx
34512 ix86_expand_special_args_builtin (const struct builtin_description *d,
34513 tree exp, rtx target)
34515 tree arg;
34516 rtx pat, op;
34517 unsigned int i, nargs, arg_adjust, memory;
34518 bool aligned_mem = false;
34519 struct
34521 rtx op;
34522 machine_mode mode;
34523 } args[3];
34524 enum insn_code icode = d->icode;
34525 bool last_arg_constant = false;
34526 const struct insn_data_d *insn_p = &insn_data[icode];
34527 machine_mode tmode = insn_p->operand[0].mode;
34528 enum { load, store } klass;
34530 switch ((enum ix86_builtin_func_type) d->flag)
34532 case VOID_FTYPE_VOID:
34533 emit_insn (GEN_FCN (icode) (target));
34534 return 0;
34535 case VOID_FTYPE_UINT64:
34536 case VOID_FTYPE_UNSIGNED:
34537 nargs = 0;
34538 klass = store;
34539 memory = 0;
34540 break;
34542 case INT_FTYPE_VOID:
34543 case USHORT_FTYPE_VOID:
34544 case UINT64_FTYPE_VOID:
34545 case UNSIGNED_FTYPE_VOID:
34546 nargs = 0;
34547 klass = load;
34548 memory = 0;
34549 break;
34550 case UINT64_FTYPE_PUNSIGNED:
34551 case V2DI_FTYPE_PV2DI:
34552 case V4DI_FTYPE_PV4DI:
34553 case V32QI_FTYPE_PCCHAR:
34554 case V16QI_FTYPE_PCCHAR:
34555 case V8SF_FTYPE_PCV4SF:
34556 case V8SF_FTYPE_PCFLOAT:
34557 case V4SF_FTYPE_PCFLOAT:
34558 case V4DF_FTYPE_PCV2DF:
34559 case V4DF_FTYPE_PCDOUBLE:
34560 case V2DF_FTYPE_PCDOUBLE:
34561 case VOID_FTYPE_PVOID:
34562 case V8DI_FTYPE_PV8DI:
34563 nargs = 1;
34564 klass = load;
34565 memory = 0;
34566 switch (icode)
34568 case CODE_FOR_sse4_1_movntdqa:
34569 case CODE_FOR_avx2_movntdqa:
34570 case CODE_FOR_avx512f_movntdqa:
34571 aligned_mem = true;
34572 break;
34573 default:
34574 break;
34576 break;
34577 case VOID_FTYPE_PV2SF_V4SF:
34578 case VOID_FTYPE_PV8DI_V8DI:
34579 case VOID_FTYPE_PV4DI_V4DI:
34580 case VOID_FTYPE_PV2DI_V2DI:
34581 case VOID_FTYPE_PCHAR_V32QI:
34582 case VOID_FTYPE_PCHAR_V16QI:
34583 case VOID_FTYPE_PFLOAT_V16SF:
34584 case VOID_FTYPE_PFLOAT_V8SF:
34585 case VOID_FTYPE_PFLOAT_V4SF:
34586 case VOID_FTYPE_PDOUBLE_V8DF:
34587 case VOID_FTYPE_PDOUBLE_V4DF:
34588 case VOID_FTYPE_PDOUBLE_V2DF:
34589 case VOID_FTYPE_PLONGLONG_LONGLONG:
34590 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34591 case VOID_FTYPE_PINT_INT:
34592 nargs = 1;
34593 klass = store;
34594 /* Reserve memory operand for target. */
34595 memory = ARRAY_SIZE (args);
34596 switch (icode)
34598 /* These builtins and instructions require the memory
34599 to be properly aligned. */
34600 case CODE_FOR_avx_movntv4di:
34601 case CODE_FOR_sse2_movntv2di:
34602 case CODE_FOR_avx_movntv8sf:
34603 case CODE_FOR_sse_movntv4sf:
34604 case CODE_FOR_sse4a_vmmovntv4sf:
34605 case CODE_FOR_avx_movntv4df:
34606 case CODE_FOR_sse2_movntv2df:
34607 case CODE_FOR_sse4a_vmmovntv2df:
34608 case CODE_FOR_sse2_movntidi:
34609 case CODE_FOR_sse_movntq:
34610 case CODE_FOR_sse2_movntisi:
34611 case CODE_FOR_avx512f_movntv16sf:
34612 case CODE_FOR_avx512f_movntv8df:
34613 case CODE_FOR_avx512f_movntv8di:
34614 aligned_mem = true;
34615 break;
34616 default:
34617 break;
34619 break;
34620 case V4SF_FTYPE_V4SF_PCV2SF:
34621 case V2DF_FTYPE_V2DF_PCDOUBLE:
34622 nargs = 2;
34623 klass = load;
34624 memory = 1;
34625 break;
34626 case V8SF_FTYPE_PCV8SF_V8SI:
34627 case V4DF_FTYPE_PCV4DF_V4DI:
34628 case V4SF_FTYPE_PCV4SF_V4SI:
34629 case V2DF_FTYPE_PCV2DF_V2DI:
34630 case V8SI_FTYPE_PCV8SI_V8SI:
34631 case V4DI_FTYPE_PCV4DI_V4DI:
34632 case V4SI_FTYPE_PCV4SI_V4SI:
34633 case V2DI_FTYPE_PCV2DI_V2DI:
34634 case VOID_FTYPE_INT_INT64:
34635 nargs = 2;
34636 klass = load;
34637 memory = 0;
34638 break;
34639 case VOID_FTYPE_PV8DF_V8DF_UQI:
34640 case VOID_FTYPE_PV4DF_V4DF_UQI:
34641 case VOID_FTYPE_PV2DF_V2DF_UQI:
34642 case VOID_FTYPE_PV16SF_V16SF_UHI:
34643 case VOID_FTYPE_PV8SF_V8SF_UQI:
34644 case VOID_FTYPE_PV4SF_V4SF_UQI:
34645 case VOID_FTYPE_PV8DI_V8DI_UQI:
34646 case VOID_FTYPE_PV4DI_V4DI_UQI:
34647 case VOID_FTYPE_PV2DI_V2DI_UQI:
34648 case VOID_FTYPE_PV16SI_V16SI_UHI:
34649 case VOID_FTYPE_PV8SI_V8SI_UQI:
34650 case VOID_FTYPE_PV4SI_V4SI_UQI:
34651 switch (icode)
34653 /* These builtins and instructions require the memory
34654 to be properly aligned. */
34655 case CODE_FOR_avx512f_storev16sf_mask:
34656 case CODE_FOR_avx512f_storev16si_mask:
34657 case CODE_FOR_avx512f_storev8df_mask:
34658 case CODE_FOR_avx512f_storev8di_mask:
34659 case CODE_FOR_avx512vl_storev8sf_mask:
34660 case CODE_FOR_avx512vl_storev8si_mask:
34661 case CODE_FOR_avx512vl_storev4df_mask:
34662 case CODE_FOR_avx512vl_storev4di_mask:
34663 case CODE_FOR_avx512vl_storev4sf_mask:
34664 case CODE_FOR_avx512vl_storev4si_mask:
34665 case CODE_FOR_avx512vl_storev2df_mask:
34666 case CODE_FOR_avx512vl_storev2di_mask:
34667 aligned_mem = true;
34668 break;
34669 default:
34670 break;
34672 /* FALLTHRU */
34673 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34674 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34675 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34676 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34677 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34678 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34679 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34680 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34681 case VOID_FTYPE_PV8SI_V8DI_UQI:
34682 case VOID_FTYPE_PV8HI_V8DI_UQI:
34683 case VOID_FTYPE_PV16HI_V16SI_UHI:
34684 case VOID_FTYPE_PV16QI_V8DI_UQI:
34685 case VOID_FTYPE_PV16QI_V16SI_UHI:
34686 case VOID_FTYPE_PV4SI_V4DI_UQI:
34687 case VOID_FTYPE_PV4SI_V2DI_UQI:
34688 case VOID_FTYPE_PV8HI_V4DI_UQI:
34689 case VOID_FTYPE_PV8HI_V2DI_UQI:
34690 case VOID_FTYPE_PV8HI_V8SI_UQI:
34691 case VOID_FTYPE_PV8HI_V4SI_UQI:
34692 case VOID_FTYPE_PV16QI_V4DI_UQI:
34693 case VOID_FTYPE_PV16QI_V2DI_UQI:
34694 case VOID_FTYPE_PV16QI_V8SI_UQI:
34695 case VOID_FTYPE_PV16QI_V4SI_UQI:
34696 case VOID_FTYPE_PCHAR_V64QI_UDI:
34697 case VOID_FTYPE_PCHAR_V32QI_USI:
34698 case VOID_FTYPE_PCHAR_V16QI_UHI:
34699 case VOID_FTYPE_PSHORT_V32HI_USI:
34700 case VOID_FTYPE_PSHORT_V16HI_UHI:
34701 case VOID_FTYPE_PSHORT_V8HI_UQI:
34702 case VOID_FTYPE_PINT_V16SI_UHI:
34703 case VOID_FTYPE_PINT_V8SI_UQI:
34704 case VOID_FTYPE_PINT_V4SI_UQI:
34705 case VOID_FTYPE_PINT64_V8DI_UQI:
34706 case VOID_FTYPE_PINT64_V4DI_UQI:
34707 case VOID_FTYPE_PINT64_V2DI_UQI:
34708 case VOID_FTYPE_PDOUBLE_V8DF_UQI:
34709 case VOID_FTYPE_PDOUBLE_V4DF_UQI:
34710 case VOID_FTYPE_PDOUBLE_V2DF_UQI:
34711 case VOID_FTYPE_PFLOAT_V16SF_UHI:
34712 case VOID_FTYPE_PFLOAT_V8SF_UQI:
34713 case VOID_FTYPE_PFLOAT_V4SF_UQI:
34714 case VOID_FTYPE_PV32QI_V32HI_USI:
34715 case VOID_FTYPE_PV16QI_V16HI_UHI:
34716 case VOID_FTYPE_PV8QI_V8HI_UQI:
34717 nargs = 2;
34718 klass = store;
34719 /* Reserve memory operand for target. */
34720 memory = ARRAY_SIZE (args);
34721 break;
34722 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
34723 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
34724 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
34725 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
34726 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
34727 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
34728 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
34729 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
34730 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
34731 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
34732 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
34733 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
34734 switch (icode)
34736 /* These builtins and instructions require the memory
34737 to be properly aligned. */
34738 case CODE_FOR_avx512f_loadv16sf_mask:
34739 case CODE_FOR_avx512f_loadv16si_mask:
34740 case CODE_FOR_avx512f_loadv8df_mask:
34741 case CODE_FOR_avx512f_loadv8di_mask:
34742 case CODE_FOR_avx512vl_loadv8sf_mask:
34743 case CODE_FOR_avx512vl_loadv8si_mask:
34744 case CODE_FOR_avx512vl_loadv4df_mask:
34745 case CODE_FOR_avx512vl_loadv4di_mask:
34746 case CODE_FOR_avx512vl_loadv4sf_mask:
34747 case CODE_FOR_avx512vl_loadv4si_mask:
34748 case CODE_FOR_avx512vl_loadv2df_mask:
34749 case CODE_FOR_avx512vl_loadv2di_mask:
34750 case CODE_FOR_avx512bw_loadv64qi_mask:
34751 case CODE_FOR_avx512vl_loadv32qi_mask:
34752 case CODE_FOR_avx512vl_loadv16qi_mask:
34753 case CODE_FOR_avx512bw_loadv32hi_mask:
34754 case CODE_FOR_avx512vl_loadv16hi_mask:
34755 case CODE_FOR_avx512vl_loadv8hi_mask:
34756 aligned_mem = true;
34757 break;
34758 default:
34759 break;
34761 case V64QI_FTYPE_PCCHAR_V64QI_UDI:
34762 case V32QI_FTYPE_PCCHAR_V32QI_USI:
34763 case V16QI_FTYPE_PCCHAR_V16QI_UHI:
34764 case V32HI_FTYPE_PCSHORT_V32HI_USI:
34765 case V16HI_FTYPE_PCSHORT_V16HI_UHI:
34766 case V8HI_FTYPE_PCSHORT_V8HI_UQI:
34767 case V16SI_FTYPE_PCINT_V16SI_UHI:
34768 case V8SI_FTYPE_PCINT_V8SI_UQI:
34769 case V4SI_FTYPE_PCINT_V4SI_UQI:
34770 case V8DI_FTYPE_PCINT64_V8DI_UQI:
34771 case V4DI_FTYPE_PCINT64_V4DI_UQI:
34772 case V2DI_FTYPE_PCINT64_V2DI_UQI:
34773 case V8DF_FTYPE_PCDOUBLE_V8DF_UQI:
34774 case V4DF_FTYPE_PCDOUBLE_V4DF_UQI:
34775 case V2DF_FTYPE_PCDOUBLE_V2DF_UQI:
34776 case V16SF_FTYPE_PCFLOAT_V16SF_UHI:
34777 case V8SF_FTYPE_PCFLOAT_V8SF_UQI:
34778 case V4SF_FTYPE_PCFLOAT_V4SF_UQI:
34779 nargs = 3;
34780 klass = load;
34781 memory = 0;
34782 break;
34783 case VOID_FTYPE_UINT_UINT_UINT:
34784 case VOID_FTYPE_UINT64_UINT_UINT:
34785 case UCHAR_FTYPE_UINT_UINT_UINT:
34786 case UCHAR_FTYPE_UINT64_UINT_UINT:
34787 nargs = 3;
34788 klass = load;
34789 memory = ARRAY_SIZE (args);
34790 last_arg_constant = true;
34791 break;
34792 default:
34793 gcc_unreachable ();
34796 gcc_assert (nargs <= ARRAY_SIZE (args));
34798 if (klass == store)
34800 arg = CALL_EXPR_ARG (exp, 0);
34801 op = expand_normal (arg);
34802 gcc_assert (target == 0);
34803 if (memory)
34805 op = ix86_zero_extend_to_Pmode (op);
34806 target = gen_rtx_MEM (tmode, op);
34807 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34808 on it. Try to improve it using get_pointer_alignment,
34809 and if the special builtin is one that requires strict
34810 mode alignment, also from it's GET_MODE_ALIGNMENT.
34811 Failure to do so could lead to ix86_legitimate_combined_insn
34812 rejecting all changes to such insns. */
34813 unsigned int align = get_pointer_alignment (arg);
34814 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34815 align = GET_MODE_ALIGNMENT (tmode);
34816 if (MEM_ALIGN (target) < align)
34817 set_mem_align (target, align);
34819 else
34820 target = force_reg (tmode, op);
34821 arg_adjust = 1;
34823 else
34825 arg_adjust = 0;
34826 if (optimize
34827 || target == 0
34828 || !register_operand (target, tmode)
34829 || GET_MODE (target) != tmode)
34830 target = gen_reg_rtx (tmode);
34833 for (i = 0; i < nargs; i++)
34835 machine_mode mode = insn_p->operand[i + 1].mode;
34836 bool match;
34838 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34839 op = expand_normal (arg);
34840 match = insn_p->operand[i + 1].predicate (op, mode);
34842 if (last_arg_constant && (i + 1) == nargs)
34844 if (!match)
34846 if (icode == CODE_FOR_lwp_lwpvalsi3
34847 || icode == CODE_FOR_lwp_lwpinssi3
34848 || icode == CODE_FOR_lwp_lwpvaldi3
34849 || icode == CODE_FOR_lwp_lwpinsdi3)
34850 error ("the last argument must be a 32-bit immediate");
34851 else
34852 error ("the last argument must be an 8-bit immediate");
34853 return const0_rtx;
34856 else
34858 if (i == memory)
34860 /* This must be the memory operand. */
34861 op = ix86_zero_extend_to_Pmode (op);
34862 op = gen_rtx_MEM (mode, op);
34863 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34864 on it. Try to improve it using get_pointer_alignment,
34865 and if the special builtin is one that requires strict
34866 mode alignment, also from it's GET_MODE_ALIGNMENT.
34867 Failure to do so could lead to ix86_legitimate_combined_insn
34868 rejecting all changes to such insns. */
34869 unsigned int align = get_pointer_alignment (arg);
34870 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34871 align = GET_MODE_ALIGNMENT (mode);
34872 if (MEM_ALIGN (op) < align)
34873 set_mem_align (op, align);
34875 else
34877 /* This must be register. */
34878 if (VECTOR_MODE_P (mode))
34879 op = safe_vector_operand (op, mode);
34881 op = fixup_modeless_constant (op, mode);
34883 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34884 op = copy_to_mode_reg (mode, op);
34885 else
34887 op = copy_to_reg (op);
34888 op = lowpart_subreg (mode, op, GET_MODE (op));
34893 args[i].op = op;
34894 args[i].mode = mode;
34897 switch (nargs)
34899 case 0:
34900 pat = GEN_FCN (icode) (target);
34901 break;
34902 case 1:
34903 pat = GEN_FCN (icode) (target, args[0].op);
34904 break;
34905 case 2:
34906 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34907 break;
34908 case 3:
34909 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34910 break;
34911 default:
34912 gcc_unreachable ();
34915 if (! pat)
34916 return 0;
34917 emit_insn (pat);
34918 return klass == store ? 0 : target;
34921 /* Return the integer constant in ARG. Constrain it to be in the range
34922 of the subparts of VEC_TYPE; issue an error if not. */
34924 static int
34925 get_element_number (tree vec_type, tree arg)
34927 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34929 if (!tree_fits_uhwi_p (arg)
34930 || (elt = tree_to_uhwi (arg), elt > max))
34932 error ("selector must be an integer constant in the range 0..%wi", max);
34933 return 0;
34936 return elt;
34939 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34940 ix86_expand_vector_init. We DO have language-level syntax for this, in
34941 the form of (type){ init-list }. Except that since we can't place emms
34942 instructions from inside the compiler, we can't allow the use of MMX
34943 registers unless the user explicitly asks for it. So we do *not* define
34944 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34945 we have builtins invoked by mmintrin.h that gives us license to emit
34946 these sorts of instructions. */
34948 static rtx
34949 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34951 machine_mode tmode = TYPE_MODE (type);
34952 machine_mode inner_mode = GET_MODE_INNER (tmode);
34953 int i, n_elt = GET_MODE_NUNITS (tmode);
34954 rtvec v = rtvec_alloc (n_elt);
34956 gcc_assert (VECTOR_MODE_P (tmode));
34957 gcc_assert (call_expr_nargs (exp) == n_elt);
34959 for (i = 0; i < n_elt; ++i)
34961 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34962 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34965 if (!target || !register_operand (target, tmode))
34966 target = gen_reg_rtx (tmode);
34968 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34969 return target;
34972 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34973 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34974 had a language-level syntax for referencing vector elements. */
34976 static rtx
34977 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34979 machine_mode tmode, mode0;
34980 tree arg0, arg1;
34981 int elt;
34982 rtx op0;
34984 arg0 = CALL_EXPR_ARG (exp, 0);
34985 arg1 = CALL_EXPR_ARG (exp, 1);
34987 op0 = expand_normal (arg0);
34988 elt = get_element_number (TREE_TYPE (arg0), arg1);
34990 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34991 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34992 gcc_assert (VECTOR_MODE_P (mode0));
34994 op0 = force_reg (mode0, op0);
34996 if (optimize || !target || !register_operand (target, tmode))
34997 target = gen_reg_rtx (tmode);
34999 ix86_expand_vector_extract (true, target, op0, elt);
35001 return target;
35004 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35005 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35006 a language-level syntax for referencing vector elements. */
35008 static rtx
35009 ix86_expand_vec_set_builtin (tree exp)
35011 machine_mode tmode, mode1;
35012 tree arg0, arg1, arg2;
35013 int elt;
35014 rtx op0, op1, target;
35016 arg0 = CALL_EXPR_ARG (exp, 0);
35017 arg1 = CALL_EXPR_ARG (exp, 1);
35018 arg2 = CALL_EXPR_ARG (exp, 2);
35020 tmode = TYPE_MODE (TREE_TYPE (arg0));
35021 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35022 gcc_assert (VECTOR_MODE_P (tmode));
35024 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35025 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35026 elt = get_element_number (TREE_TYPE (arg0), arg2);
35028 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35029 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35031 op0 = force_reg (tmode, op0);
35032 op1 = force_reg (mode1, op1);
35034 /* OP0 is the source of these builtin functions and shouldn't be
35035 modified. Create a copy, use it and return it as target. */
35036 target = gen_reg_rtx (tmode);
35037 emit_move_insn (target, op0);
35038 ix86_expand_vector_set (true, target, op1, elt);
35040 return target;
35043 /* Emit conditional move of SRC to DST with condition
35044 OP1 CODE OP2. */
35045 static void
35046 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
35048 rtx t;
35050 if (TARGET_CMOVE)
35052 t = ix86_expand_compare (code, op1, op2);
35053 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
35054 src, dst)));
35056 else
35058 rtx_code_label *nomove = gen_label_rtx ();
35059 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
35060 const0_rtx, GET_MODE (op1), 1, nomove);
35061 emit_move_insn (dst, src);
35062 emit_label (nomove);
35066 /* Choose max of DST and SRC and put it to DST. */
35067 static void
35068 ix86_emit_move_max (rtx dst, rtx src)
35070 ix86_emit_cmove (dst, src, LTU, dst, src);
35073 /* Expand an expression EXP that calls a built-in function,
35074 with result going to TARGET if that's convenient
35075 (and in mode MODE if that's convenient).
35076 SUBTARGET may be used as the target for computing one of EXP's operands.
35077 IGNORE is nonzero if the value is to be ignored. */
35079 static rtx
35080 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35081 machine_mode mode, int ignore)
35083 size_t i;
35084 enum insn_code icode, icode2;
35085 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35086 tree arg0, arg1, arg2, arg3, arg4;
35087 rtx op0, op1, op2, op3, op4, pat, pat2, insn;
35088 machine_mode mode0, mode1, mode2, mode3, mode4;
35089 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35091 /* For CPU builtins that can be folded, fold first and expand the fold. */
35092 switch (fcode)
35094 case IX86_BUILTIN_CPU_INIT:
35096 /* Make it call __cpu_indicator_init in libgcc. */
35097 tree call_expr, fndecl, type;
35098 type = build_function_type_list (integer_type_node, NULL_TREE);
35099 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35100 call_expr = build_call_expr (fndecl, 0);
35101 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35103 case IX86_BUILTIN_CPU_IS:
35104 case IX86_BUILTIN_CPU_SUPPORTS:
35106 tree arg0 = CALL_EXPR_ARG (exp, 0);
35107 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35108 gcc_assert (fold_expr != NULL_TREE);
35109 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35113 /* Determine whether the builtin function is available under the current ISA.
35114 Originally the builtin was not created if it wasn't applicable to the
35115 current ISA based on the command line switches. With function specific
35116 options, we need to check in the context of the function making the call
35117 whether it is supported. Treat AVX512VL and MMX specially. For other flags,
35118 if isa includes more than one ISA bit, treat those are requiring any
35119 of them. For AVX512VL, require both AVX512VL and the non-AVX512VL
35120 ISAs. Likewise for MMX, require both MMX and the non-MMX ISAs.
35121 Similarly for 64BIT, but we shouldn't be building such builtins
35122 at all, -m64 is a whole TU option. */
35123 if (((ix86_builtins_isa[fcode].isa
35124 & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX
35125 | OPTION_MASK_ISA_64BIT))
35126 && !(ix86_builtins_isa[fcode].isa
35127 & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX
35128 | OPTION_MASK_ISA_64BIT)
35129 & ix86_isa_flags))
35130 || ((ix86_builtins_isa[fcode].isa & OPTION_MASK_ISA_AVX512VL)
35131 && !(ix86_isa_flags & OPTION_MASK_ISA_AVX512VL))
35132 || ((ix86_builtins_isa[fcode].isa & OPTION_MASK_ISA_MMX)
35133 && !(ix86_isa_flags & OPTION_MASK_ISA_MMX))
35134 || (ix86_builtins_isa[fcode].isa2
35135 && !(ix86_builtins_isa[fcode].isa2 & ix86_isa_flags2)))
35137 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa,
35138 ix86_builtins_isa[fcode].isa2, 0, 0,
35139 NULL, NULL, (enum fpmath_unit) 0,
35140 false);
35141 if (!opts)
35142 error ("%qE needs unknown isa option", fndecl);
35143 else
35145 gcc_assert (opts != NULL);
35146 error ("%qE needs isa option %s", fndecl, opts);
35147 free (opts);
35149 return expand_call (exp, target, ignore);
35152 switch (fcode)
35154 case IX86_BUILTIN_BNDMK:
35155 if (!target
35156 || GET_MODE (target) != BNDmode
35157 || !register_operand (target, BNDmode))
35158 target = gen_reg_rtx (BNDmode);
35160 arg0 = CALL_EXPR_ARG (exp, 0);
35161 arg1 = CALL_EXPR_ARG (exp, 1);
35163 op0 = expand_normal (arg0);
35164 op1 = expand_normal (arg1);
35166 if (!register_operand (op0, Pmode))
35167 op0 = ix86_zero_extend_to_Pmode (op0);
35168 if (!register_operand (op1, Pmode))
35169 op1 = ix86_zero_extend_to_Pmode (op1);
35171 /* Builtin arg1 is size of block but instruction op1 should
35172 be (size - 1). */
35173 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
35174 NULL_RTX, 1, OPTAB_DIRECT);
35176 emit_insn (BNDmode == BND64mode
35177 ? gen_bnd64_mk (target, op0, op1)
35178 : gen_bnd32_mk (target, op0, op1));
35179 return target;
35181 case IX86_BUILTIN_BNDSTX:
35182 arg0 = CALL_EXPR_ARG (exp, 0);
35183 arg1 = CALL_EXPR_ARG (exp, 1);
35184 arg2 = CALL_EXPR_ARG (exp, 2);
35186 op0 = expand_normal (arg0);
35187 op1 = expand_normal (arg1);
35188 op2 = expand_normal (arg2);
35190 if (!register_operand (op0, Pmode))
35191 op0 = ix86_zero_extend_to_Pmode (op0);
35192 if (!register_operand (op1, BNDmode))
35193 op1 = copy_to_mode_reg (BNDmode, op1);
35194 if (!register_operand (op2, Pmode))
35195 op2 = ix86_zero_extend_to_Pmode (op2);
35197 emit_insn (BNDmode == BND64mode
35198 ? gen_bnd64_stx (op2, op0, op1)
35199 : gen_bnd32_stx (op2, op0, op1));
35200 return 0;
35202 case IX86_BUILTIN_BNDLDX:
35203 if (!target
35204 || GET_MODE (target) != BNDmode
35205 || !register_operand (target, BNDmode))
35206 target = gen_reg_rtx (BNDmode);
35208 arg0 = CALL_EXPR_ARG (exp, 0);
35209 arg1 = CALL_EXPR_ARG (exp, 1);
35211 op0 = expand_normal (arg0);
35212 op1 = expand_normal (arg1);
35214 if (!register_operand (op0, Pmode))
35215 op0 = ix86_zero_extend_to_Pmode (op0);
35216 if (!register_operand (op1, Pmode))
35217 op1 = ix86_zero_extend_to_Pmode (op1);
35219 emit_insn (BNDmode == BND64mode
35220 ? gen_bnd64_ldx (target, op0, op1)
35221 : gen_bnd32_ldx (target, op0, op1));
35222 return target;
35224 case IX86_BUILTIN_BNDCL:
35225 arg0 = CALL_EXPR_ARG (exp, 0);
35226 arg1 = CALL_EXPR_ARG (exp, 1);
35228 op0 = expand_normal (arg0);
35229 op1 = expand_normal (arg1);
35231 if (!register_operand (op0, Pmode))
35232 op0 = ix86_zero_extend_to_Pmode (op0);
35233 if (!register_operand (op1, BNDmode))
35234 op1 = copy_to_mode_reg (BNDmode, op1);
35236 emit_insn (BNDmode == BND64mode
35237 ? gen_bnd64_cl (op1, op0)
35238 : gen_bnd32_cl (op1, op0));
35239 return 0;
35241 case IX86_BUILTIN_BNDCU:
35242 arg0 = CALL_EXPR_ARG (exp, 0);
35243 arg1 = CALL_EXPR_ARG (exp, 1);
35245 op0 = expand_normal (arg0);
35246 op1 = expand_normal (arg1);
35248 if (!register_operand (op0, Pmode))
35249 op0 = ix86_zero_extend_to_Pmode (op0);
35250 if (!register_operand (op1, BNDmode))
35251 op1 = copy_to_mode_reg (BNDmode, op1);
35253 emit_insn (BNDmode == BND64mode
35254 ? gen_bnd64_cu (op1, op0)
35255 : gen_bnd32_cu (op1, op0));
35256 return 0;
35258 case IX86_BUILTIN_BNDRET:
35259 arg0 = CALL_EXPR_ARG (exp, 0);
35260 target = chkp_get_rtl_bounds (arg0);
35262 /* If no bounds were specified for returned value,
35263 then use INIT bounds. It usually happens when
35264 some built-in function is expanded. */
35265 if (!target)
35267 rtx t1 = gen_reg_rtx (Pmode);
35268 rtx t2 = gen_reg_rtx (Pmode);
35269 target = gen_reg_rtx (BNDmode);
35270 emit_move_insn (t1, const0_rtx);
35271 emit_move_insn (t2, constm1_rtx);
35272 emit_insn (BNDmode == BND64mode
35273 ? gen_bnd64_mk (target, t1, t2)
35274 : gen_bnd32_mk (target, t1, t2));
35277 gcc_assert (target && REG_P (target));
35278 return target;
35280 case IX86_BUILTIN_BNDNARROW:
35282 rtx m1, m1h1, m1h2, lb, ub, t1;
35284 /* Return value and lb. */
35285 arg0 = CALL_EXPR_ARG (exp, 0);
35286 /* Bounds. */
35287 arg1 = CALL_EXPR_ARG (exp, 1);
35288 /* Size. */
35289 arg2 = CALL_EXPR_ARG (exp, 2);
35291 lb = expand_normal (arg0);
35292 op1 = expand_normal (arg1);
35293 op2 = expand_normal (arg2);
35295 /* Size was passed but we need to use (size - 1) as for bndmk. */
35296 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
35297 NULL_RTX, 1, OPTAB_DIRECT);
35299 /* Add LB to size and inverse to get UB. */
35300 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
35301 op2, 1, OPTAB_DIRECT);
35302 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
35304 if (!register_operand (lb, Pmode))
35305 lb = ix86_zero_extend_to_Pmode (lb);
35306 if (!register_operand (ub, Pmode))
35307 ub = ix86_zero_extend_to_Pmode (ub);
35309 /* We need to move bounds to memory before any computations. */
35310 if (MEM_P (op1))
35311 m1 = op1;
35312 else
35314 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
35315 emit_move_insn (m1, op1);
35318 /* Generate mem expression to be used for access to LB and UB. */
35319 m1h1 = adjust_address (m1, Pmode, 0);
35320 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
35322 t1 = gen_reg_rtx (Pmode);
35324 /* Compute LB. */
35325 emit_move_insn (t1, m1h1);
35326 ix86_emit_move_max (t1, lb);
35327 emit_move_insn (m1h1, t1);
35329 /* Compute UB. UB is stored in 1's complement form. Therefore
35330 we also use max here. */
35331 emit_move_insn (t1, m1h2);
35332 ix86_emit_move_max (t1, ub);
35333 emit_move_insn (m1h2, t1);
35335 op2 = gen_reg_rtx (BNDmode);
35336 emit_move_insn (op2, m1);
35338 return chkp_join_splitted_slot (lb, op2);
35341 case IX86_BUILTIN_BNDINT:
35343 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
35345 if (!target
35346 || GET_MODE (target) != BNDmode
35347 || !register_operand (target, BNDmode))
35348 target = gen_reg_rtx (BNDmode);
35350 arg0 = CALL_EXPR_ARG (exp, 0);
35351 arg1 = CALL_EXPR_ARG (exp, 1);
35353 op0 = expand_normal (arg0);
35354 op1 = expand_normal (arg1);
35356 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
35357 rh1 = adjust_address (res, Pmode, 0);
35358 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
35360 /* Put first bounds to temporaries. */
35361 lb1 = gen_reg_rtx (Pmode);
35362 ub1 = gen_reg_rtx (Pmode);
35363 if (MEM_P (op0))
35365 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
35366 emit_move_insn (ub1, adjust_address (op0, Pmode,
35367 GET_MODE_SIZE (Pmode)));
35369 else
35371 emit_move_insn (res, op0);
35372 emit_move_insn (lb1, rh1);
35373 emit_move_insn (ub1, rh2);
35376 /* Put second bounds to temporaries. */
35377 lb2 = gen_reg_rtx (Pmode);
35378 ub2 = gen_reg_rtx (Pmode);
35379 if (MEM_P (op1))
35381 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
35382 emit_move_insn (ub2, adjust_address (op1, Pmode,
35383 GET_MODE_SIZE (Pmode)));
35385 else
35387 emit_move_insn (res, op1);
35388 emit_move_insn (lb2, rh1);
35389 emit_move_insn (ub2, rh2);
35392 /* Compute LB. */
35393 ix86_emit_move_max (lb1, lb2);
35394 emit_move_insn (rh1, lb1);
35396 /* Compute UB. UB is stored in 1's complement form. Therefore
35397 we also use max here. */
35398 ix86_emit_move_max (ub1, ub2);
35399 emit_move_insn (rh2, ub1);
35401 emit_move_insn (target, res);
35403 return target;
35406 case IX86_BUILTIN_SIZEOF:
35408 tree name;
35409 rtx symbol;
35411 if (!target
35412 || GET_MODE (target) != Pmode
35413 || !register_operand (target, Pmode))
35414 target = gen_reg_rtx (Pmode);
35416 arg0 = CALL_EXPR_ARG (exp, 0);
35417 gcc_assert (VAR_P (arg0));
35419 name = DECL_ASSEMBLER_NAME (arg0);
35420 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
35422 emit_insn (Pmode == SImode
35423 ? gen_move_size_reloc_si (target, symbol)
35424 : gen_move_size_reloc_di (target, symbol));
35426 return target;
35429 case IX86_BUILTIN_BNDLOWER:
35431 rtx mem, hmem;
35433 if (!target
35434 || GET_MODE (target) != Pmode
35435 || !register_operand (target, Pmode))
35436 target = gen_reg_rtx (Pmode);
35438 arg0 = CALL_EXPR_ARG (exp, 0);
35439 op0 = expand_normal (arg0);
35441 /* We need to move bounds to memory first. */
35442 if (MEM_P (op0))
35443 mem = op0;
35444 else
35446 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
35447 emit_move_insn (mem, op0);
35450 /* Generate mem expression to access LB and load it. */
35451 hmem = adjust_address (mem, Pmode, 0);
35452 emit_move_insn (target, hmem);
35454 return target;
35457 case IX86_BUILTIN_BNDUPPER:
35459 rtx mem, hmem, res;
35461 if (!target
35462 || GET_MODE (target) != Pmode
35463 || !register_operand (target, Pmode))
35464 target = gen_reg_rtx (Pmode);
35466 arg0 = CALL_EXPR_ARG (exp, 0);
35467 op0 = expand_normal (arg0);
35469 /* We need to move bounds to memory first. */
35470 if (MEM_P (op0))
35471 mem = op0;
35472 else
35474 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
35475 emit_move_insn (mem, op0);
35478 /* Generate mem expression to access UB. */
35479 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
35481 /* We need to inverse all bits of UB. */
35482 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
35484 if (res != target)
35485 emit_move_insn (target, res);
35487 return target;
35490 case IX86_BUILTIN_MASKMOVQ:
35491 case IX86_BUILTIN_MASKMOVDQU:
35492 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35493 ? CODE_FOR_mmx_maskmovq
35494 : CODE_FOR_sse2_maskmovdqu);
35495 /* Note the arg order is different from the operand order. */
35496 arg1 = CALL_EXPR_ARG (exp, 0);
35497 arg2 = CALL_EXPR_ARG (exp, 1);
35498 arg0 = CALL_EXPR_ARG (exp, 2);
35499 op0 = expand_normal (arg0);
35500 op1 = expand_normal (arg1);
35501 op2 = expand_normal (arg2);
35502 mode0 = insn_data[icode].operand[0].mode;
35503 mode1 = insn_data[icode].operand[1].mode;
35504 mode2 = insn_data[icode].operand[2].mode;
35506 op0 = ix86_zero_extend_to_Pmode (op0);
35507 op0 = gen_rtx_MEM (mode1, op0);
35509 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35510 op0 = copy_to_mode_reg (mode0, op0);
35511 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35512 op1 = copy_to_mode_reg (mode1, op1);
35513 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35514 op2 = copy_to_mode_reg (mode2, op2);
35515 pat = GEN_FCN (icode) (op0, op1, op2);
35516 if (! pat)
35517 return 0;
35518 emit_insn (pat);
35519 return 0;
35521 case IX86_BUILTIN_LDMXCSR:
35522 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35523 target = assign_386_stack_local (SImode, SLOT_TEMP);
35524 emit_move_insn (target, op0);
35525 emit_insn (gen_sse_ldmxcsr (target));
35526 return 0;
35528 case IX86_BUILTIN_STMXCSR:
35529 target = assign_386_stack_local (SImode, SLOT_TEMP);
35530 emit_insn (gen_sse_stmxcsr (target));
35531 return copy_to_mode_reg (SImode, target);
35533 case IX86_BUILTIN_CLFLUSH:
35534 arg0 = CALL_EXPR_ARG (exp, 0);
35535 op0 = expand_normal (arg0);
35536 icode = CODE_FOR_sse2_clflush;
35537 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35538 op0 = ix86_zero_extend_to_Pmode (op0);
35540 emit_insn (gen_sse2_clflush (op0));
35541 return 0;
35543 case IX86_BUILTIN_CLWB:
35544 arg0 = CALL_EXPR_ARG (exp, 0);
35545 op0 = expand_normal (arg0);
35546 icode = CODE_FOR_clwb;
35547 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35548 op0 = ix86_zero_extend_to_Pmode (op0);
35550 emit_insn (gen_clwb (op0));
35551 return 0;
35553 case IX86_BUILTIN_CLFLUSHOPT:
35554 arg0 = CALL_EXPR_ARG (exp, 0);
35555 op0 = expand_normal (arg0);
35556 icode = CODE_FOR_clflushopt;
35557 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35558 op0 = ix86_zero_extend_to_Pmode (op0);
35560 emit_insn (gen_clflushopt (op0));
35561 return 0;
35563 case IX86_BUILTIN_MONITOR:
35564 case IX86_BUILTIN_MONITORX:
35565 arg0 = CALL_EXPR_ARG (exp, 0);
35566 arg1 = CALL_EXPR_ARG (exp, 1);
35567 arg2 = CALL_EXPR_ARG (exp, 2);
35568 op0 = expand_normal (arg0);
35569 op1 = expand_normal (arg1);
35570 op2 = expand_normal (arg2);
35571 if (!REG_P (op0))
35572 op0 = ix86_zero_extend_to_Pmode (op0);
35573 if (!REG_P (op1))
35574 op1 = copy_to_mode_reg (SImode, op1);
35575 if (!REG_P (op2))
35576 op2 = copy_to_mode_reg (SImode, op2);
35578 emit_insn (fcode == IX86_BUILTIN_MONITOR
35579 ? ix86_gen_monitor (op0, op1, op2)
35580 : ix86_gen_monitorx (op0, op1, op2));
35581 return 0;
35583 case IX86_BUILTIN_MWAIT:
35584 arg0 = CALL_EXPR_ARG (exp, 0);
35585 arg1 = CALL_EXPR_ARG (exp, 1);
35586 op0 = expand_normal (arg0);
35587 op1 = expand_normal (arg1);
35588 if (!REG_P (op0))
35589 op0 = copy_to_mode_reg (SImode, op0);
35590 if (!REG_P (op1))
35591 op1 = copy_to_mode_reg (SImode, op1);
35592 emit_insn (gen_sse3_mwait (op0, op1));
35593 return 0;
35595 case IX86_BUILTIN_MWAITX:
35596 arg0 = CALL_EXPR_ARG (exp, 0);
35597 arg1 = CALL_EXPR_ARG (exp, 1);
35598 arg2 = CALL_EXPR_ARG (exp, 2);
35599 op0 = expand_normal (arg0);
35600 op1 = expand_normal (arg1);
35601 op2 = expand_normal (arg2);
35602 if (!REG_P (op0))
35603 op0 = copy_to_mode_reg (SImode, op0);
35604 if (!REG_P (op1))
35605 op1 = copy_to_mode_reg (SImode, op1);
35606 if (!REG_P (op2))
35607 op2 = copy_to_mode_reg (SImode, op2);
35608 emit_insn (gen_mwaitx (op0, op1, op2));
35609 return 0;
35611 case IX86_BUILTIN_CLZERO:
35612 arg0 = CALL_EXPR_ARG (exp, 0);
35613 op0 = expand_normal (arg0);
35614 if (!REG_P (op0))
35615 op0 = ix86_zero_extend_to_Pmode (op0);
35616 emit_insn (ix86_gen_clzero (op0));
35617 return 0;
35619 case IX86_BUILTIN_VEC_INIT_V2SI:
35620 case IX86_BUILTIN_VEC_INIT_V4HI:
35621 case IX86_BUILTIN_VEC_INIT_V8QI:
35622 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35624 case IX86_BUILTIN_VEC_EXT_V2DF:
35625 case IX86_BUILTIN_VEC_EXT_V2DI:
35626 case IX86_BUILTIN_VEC_EXT_V4SF:
35627 case IX86_BUILTIN_VEC_EXT_V4SI:
35628 case IX86_BUILTIN_VEC_EXT_V8HI:
35629 case IX86_BUILTIN_VEC_EXT_V2SI:
35630 case IX86_BUILTIN_VEC_EXT_V4HI:
35631 case IX86_BUILTIN_VEC_EXT_V16QI:
35632 return ix86_expand_vec_ext_builtin (exp, target);
35634 case IX86_BUILTIN_VEC_SET_V2DI:
35635 case IX86_BUILTIN_VEC_SET_V4SF:
35636 case IX86_BUILTIN_VEC_SET_V4SI:
35637 case IX86_BUILTIN_VEC_SET_V8HI:
35638 case IX86_BUILTIN_VEC_SET_V4HI:
35639 case IX86_BUILTIN_VEC_SET_V16QI:
35640 return ix86_expand_vec_set_builtin (exp);
35642 case IX86_BUILTIN_NANQ:
35643 case IX86_BUILTIN_NANSQ:
35644 return expand_call (exp, target, ignore);
35646 case IX86_BUILTIN_RDPMC:
35647 case IX86_BUILTIN_RDTSC:
35648 case IX86_BUILTIN_RDTSCP:
35649 case IX86_BUILTIN_XGETBV:
35651 op0 = gen_reg_rtx (DImode);
35652 op1 = gen_reg_rtx (DImode);
35654 if (fcode == IX86_BUILTIN_RDPMC)
35656 arg0 = CALL_EXPR_ARG (exp, 0);
35657 op2 = expand_normal (arg0);
35658 if (!register_operand (op2, SImode))
35659 op2 = copy_to_mode_reg (SImode, op2);
35661 insn = (TARGET_64BIT
35662 ? gen_rdpmc_rex64 (op0, op1, op2)
35663 : gen_rdpmc (op0, op2));
35664 emit_insn (insn);
35666 else if (fcode == IX86_BUILTIN_XGETBV)
35668 arg0 = CALL_EXPR_ARG (exp, 0);
35669 op2 = expand_normal (arg0);
35670 if (!register_operand (op2, SImode))
35671 op2 = copy_to_mode_reg (SImode, op2);
35673 insn = (TARGET_64BIT
35674 ? gen_xgetbv_rex64 (op0, op1, op2)
35675 : gen_xgetbv (op0, op2));
35676 emit_insn (insn);
35678 else if (fcode == IX86_BUILTIN_RDTSC)
35680 insn = (TARGET_64BIT
35681 ? gen_rdtsc_rex64 (op0, op1)
35682 : gen_rdtsc (op0));
35683 emit_insn (insn);
35685 else
35687 op2 = gen_reg_rtx (SImode);
35689 insn = (TARGET_64BIT
35690 ? gen_rdtscp_rex64 (op0, op1, op2)
35691 : gen_rdtscp (op0, op2));
35692 emit_insn (insn);
35694 arg0 = CALL_EXPR_ARG (exp, 0);
35695 op4 = expand_normal (arg0);
35696 if (!address_operand (op4, VOIDmode))
35698 op4 = convert_memory_address (Pmode, op4);
35699 op4 = copy_addr_to_reg (op4);
35701 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35704 if (target == 0)
35706 /* mode is VOIDmode if __builtin_rd* has been called
35707 without lhs. */
35708 if (mode == VOIDmode)
35709 return target;
35710 target = gen_reg_rtx (mode);
35713 if (TARGET_64BIT)
35715 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35716 op1, 1, OPTAB_DIRECT);
35717 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35718 op0, 1, OPTAB_DIRECT);
35721 emit_move_insn (target, op0);
35722 return target;
35724 case IX86_BUILTIN_FXSAVE:
35725 case IX86_BUILTIN_FXRSTOR:
35726 case IX86_BUILTIN_FXSAVE64:
35727 case IX86_BUILTIN_FXRSTOR64:
35728 case IX86_BUILTIN_FNSTENV:
35729 case IX86_BUILTIN_FLDENV:
35730 mode0 = BLKmode;
35731 switch (fcode)
35733 case IX86_BUILTIN_FXSAVE:
35734 icode = CODE_FOR_fxsave;
35735 break;
35736 case IX86_BUILTIN_FXRSTOR:
35737 icode = CODE_FOR_fxrstor;
35738 break;
35739 case IX86_BUILTIN_FXSAVE64:
35740 icode = CODE_FOR_fxsave64;
35741 break;
35742 case IX86_BUILTIN_FXRSTOR64:
35743 icode = CODE_FOR_fxrstor64;
35744 break;
35745 case IX86_BUILTIN_FNSTENV:
35746 icode = CODE_FOR_fnstenv;
35747 break;
35748 case IX86_BUILTIN_FLDENV:
35749 icode = CODE_FOR_fldenv;
35750 break;
35751 default:
35752 gcc_unreachable ();
35755 arg0 = CALL_EXPR_ARG (exp, 0);
35756 op0 = expand_normal (arg0);
35758 if (!address_operand (op0, VOIDmode))
35760 op0 = convert_memory_address (Pmode, op0);
35761 op0 = copy_addr_to_reg (op0);
35763 op0 = gen_rtx_MEM (mode0, op0);
35765 pat = GEN_FCN (icode) (op0);
35766 if (pat)
35767 emit_insn (pat);
35768 return 0;
35770 case IX86_BUILTIN_XSETBV:
35771 arg0 = CALL_EXPR_ARG (exp, 0);
35772 arg1 = CALL_EXPR_ARG (exp, 1);
35773 op0 = expand_normal (arg0);
35774 op1 = expand_normal (arg1);
35776 if (!REG_P (op0))
35777 op0 = copy_to_mode_reg (SImode, op0);
35779 if (TARGET_64BIT)
35781 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35782 NULL, 1, OPTAB_DIRECT);
35784 op2 = gen_lowpart (SImode, op2);
35785 op1 = gen_lowpart (SImode, op1);
35786 if (!REG_P (op1))
35787 op1 = copy_to_mode_reg (SImode, op1);
35788 if (!REG_P (op2))
35789 op2 = copy_to_mode_reg (SImode, op2);
35790 icode = CODE_FOR_xsetbv_rex64;
35791 pat = GEN_FCN (icode) (op0, op1, op2);
35793 else
35795 if (!REG_P (op1))
35796 op1 = copy_to_mode_reg (DImode, op1);
35797 icode = CODE_FOR_xsetbv;
35798 pat = GEN_FCN (icode) (op0, op1);
35800 if (pat)
35801 emit_insn (pat);
35802 return 0;
35804 case IX86_BUILTIN_XSAVE:
35805 case IX86_BUILTIN_XRSTOR:
35806 case IX86_BUILTIN_XSAVE64:
35807 case IX86_BUILTIN_XRSTOR64:
35808 case IX86_BUILTIN_XSAVEOPT:
35809 case IX86_BUILTIN_XSAVEOPT64:
35810 case IX86_BUILTIN_XSAVES:
35811 case IX86_BUILTIN_XRSTORS:
35812 case IX86_BUILTIN_XSAVES64:
35813 case IX86_BUILTIN_XRSTORS64:
35814 case IX86_BUILTIN_XSAVEC:
35815 case IX86_BUILTIN_XSAVEC64:
35816 arg0 = CALL_EXPR_ARG (exp, 0);
35817 arg1 = CALL_EXPR_ARG (exp, 1);
35818 op0 = expand_normal (arg0);
35819 op1 = expand_normal (arg1);
35821 if (!address_operand (op0, VOIDmode))
35823 op0 = convert_memory_address (Pmode, op0);
35824 op0 = copy_addr_to_reg (op0);
35826 op0 = gen_rtx_MEM (BLKmode, op0);
35828 op1 = force_reg (DImode, op1);
35830 if (TARGET_64BIT)
35832 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35833 NULL, 1, OPTAB_DIRECT);
35834 switch (fcode)
35836 case IX86_BUILTIN_XSAVE:
35837 icode = CODE_FOR_xsave_rex64;
35838 break;
35839 case IX86_BUILTIN_XRSTOR:
35840 icode = CODE_FOR_xrstor_rex64;
35841 break;
35842 case IX86_BUILTIN_XSAVE64:
35843 icode = CODE_FOR_xsave64;
35844 break;
35845 case IX86_BUILTIN_XRSTOR64:
35846 icode = CODE_FOR_xrstor64;
35847 break;
35848 case IX86_BUILTIN_XSAVEOPT:
35849 icode = CODE_FOR_xsaveopt_rex64;
35850 break;
35851 case IX86_BUILTIN_XSAVEOPT64:
35852 icode = CODE_FOR_xsaveopt64;
35853 break;
35854 case IX86_BUILTIN_XSAVES:
35855 icode = CODE_FOR_xsaves_rex64;
35856 break;
35857 case IX86_BUILTIN_XRSTORS:
35858 icode = CODE_FOR_xrstors_rex64;
35859 break;
35860 case IX86_BUILTIN_XSAVES64:
35861 icode = CODE_FOR_xsaves64;
35862 break;
35863 case IX86_BUILTIN_XRSTORS64:
35864 icode = CODE_FOR_xrstors64;
35865 break;
35866 case IX86_BUILTIN_XSAVEC:
35867 icode = CODE_FOR_xsavec_rex64;
35868 break;
35869 case IX86_BUILTIN_XSAVEC64:
35870 icode = CODE_FOR_xsavec64;
35871 break;
35872 default:
35873 gcc_unreachable ();
35876 op2 = gen_lowpart (SImode, op2);
35877 op1 = gen_lowpart (SImode, op1);
35878 pat = GEN_FCN (icode) (op0, op1, op2);
35880 else
35882 switch (fcode)
35884 case IX86_BUILTIN_XSAVE:
35885 icode = CODE_FOR_xsave;
35886 break;
35887 case IX86_BUILTIN_XRSTOR:
35888 icode = CODE_FOR_xrstor;
35889 break;
35890 case IX86_BUILTIN_XSAVEOPT:
35891 icode = CODE_FOR_xsaveopt;
35892 break;
35893 case IX86_BUILTIN_XSAVES:
35894 icode = CODE_FOR_xsaves;
35895 break;
35896 case IX86_BUILTIN_XRSTORS:
35897 icode = CODE_FOR_xrstors;
35898 break;
35899 case IX86_BUILTIN_XSAVEC:
35900 icode = CODE_FOR_xsavec;
35901 break;
35902 default:
35903 gcc_unreachable ();
35905 pat = GEN_FCN (icode) (op0, op1);
35908 if (pat)
35909 emit_insn (pat);
35910 return 0;
35912 case IX86_BUILTIN_LLWPCB:
35913 arg0 = CALL_EXPR_ARG (exp, 0);
35914 op0 = expand_normal (arg0);
35915 icode = CODE_FOR_lwp_llwpcb;
35916 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35917 op0 = ix86_zero_extend_to_Pmode (op0);
35918 emit_insn (gen_lwp_llwpcb (op0));
35919 return 0;
35921 case IX86_BUILTIN_SLWPCB:
35922 icode = CODE_FOR_lwp_slwpcb;
35923 if (!target
35924 || !insn_data[icode].operand[0].predicate (target, Pmode))
35925 target = gen_reg_rtx (Pmode);
35926 emit_insn (gen_lwp_slwpcb (target));
35927 return target;
35929 case IX86_BUILTIN_BEXTRI32:
35930 case IX86_BUILTIN_BEXTRI64:
35931 arg0 = CALL_EXPR_ARG (exp, 0);
35932 arg1 = CALL_EXPR_ARG (exp, 1);
35933 op0 = expand_normal (arg0);
35934 op1 = expand_normal (arg1);
35935 icode = (fcode == IX86_BUILTIN_BEXTRI32
35936 ? CODE_FOR_tbm_bextri_si
35937 : CODE_FOR_tbm_bextri_di);
35938 if (!CONST_INT_P (op1))
35940 error ("last argument must be an immediate");
35941 return const0_rtx;
35943 else
35945 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35946 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35947 op1 = GEN_INT (length);
35948 op2 = GEN_INT (lsb_index);
35949 pat = GEN_FCN (icode) (target, op0, op1, op2);
35950 if (pat)
35951 emit_insn (pat);
35952 return target;
35955 case IX86_BUILTIN_RDRAND16_STEP:
35956 icode = CODE_FOR_rdrandhi_1;
35957 mode0 = HImode;
35958 goto rdrand_step;
35960 case IX86_BUILTIN_RDRAND32_STEP:
35961 icode = CODE_FOR_rdrandsi_1;
35962 mode0 = SImode;
35963 goto rdrand_step;
35965 case IX86_BUILTIN_RDRAND64_STEP:
35966 icode = CODE_FOR_rdranddi_1;
35967 mode0 = DImode;
35969 rdrand_step:
35970 arg0 = CALL_EXPR_ARG (exp, 0);
35971 op1 = expand_normal (arg0);
35972 if (!address_operand (op1, VOIDmode))
35974 op1 = convert_memory_address (Pmode, op1);
35975 op1 = copy_addr_to_reg (op1);
35978 op0 = gen_reg_rtx (mode0);
35979 emit_insn (GEN_FCN (icode) (op0));
35981 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35983 op1 = gen_reg_rtx (SImode);
35984 emit_move_insn (op1, CONST1_RTX (SImode));
35986 /* Emit SImode conditional move. */
35987 if (mode0 == HImode)
35989 if (TARGET_ZERO_EXTEND_WITH_AND
35990 && optimize_function_for_speed_p (cfun))
35992 op2 = force_reg (SImode, const0_rtx);
35994 emit_insn (gen_movstricthi
35995 (gen_lowpart (HImode, op2), op0));
35997 else
35999 op2 = gen_reg_rtx (SImode);
36001 emit_insn (gen_zero_extendhisi2 (op2, op0));
36004 else if (mode0 == SImode)
36005 op2 = op0;
36006 else
36007 op2 = gen_rtx_SUBREG (SImode, op0, 0);
36009 if (target == 0
36010 || !register_operand (target, SImode))
36011 target = gen_reg_rtx (SImode);
36013 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
36014 const0_rtx);
36015 emit_insn (gen_rtx_SET (target,
36016 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
36017 return target;
36019 case IX86_BUILTIN_RDSEED16_STEP:
36020 icode = CODE_FOR_rdseedhi_1;
36021 mode0 = HImode;
36022 goto rdseed_step;
36024 case IX86_BUILTIN_RDSEED32_STEP:
36025 icode = CODE_FOR_rdseedsi_1;
36026 mode0 = SImode;
36027 goto rdseed_step;
36029 case IX86_BUILTIN_RDSEED64_STEP:
36030 icode = CODE_FOR_rdseeddi_1;
36031 mode0 = DImode;
36033 rdseed_step:
36034 arg0 = CALL_EXPR_ARG (exp, 0);
36035 op1 = expand_normal (arg0);
36036 if (!address_operand (op1, VOIDmode))
36038 op1 = convert_memory_address (Pmode, op1);
36039 op1 = copy_addr_to_reg (op1);
36042 op0 = gen_reg_rtx (mode0);
36043 emit_insn (GEN_FCN (icode) (op0));
36045 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
36047 op2 = gen_reg_rtx (QImode);
36049 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
36050 const0_rtx);
36051 emit_insn (gen_rtx_SET (op2, pat));
36053 if (target == 0
36054 || !register_operand (target, SImode))
36055 target = gen_reg_rtx (SImode);
36057 emit_insn (gen_zero_extendqisi2 (target, op2));
36058 return target;
36060 case IX86_BUILTIN_SBB32:
36061 icode = CODE_FOR_subborrowsi;
36062 icode2 = CODE_FOR_subborrowsi_0;
36063 mode0 = SImode;
36064 mode1 = DImode;
36065 mode2 = CCmode;
36066 goto handlecarry;
36068 case IX86_BUILTIN_SBB64:
36069 icode = CODE_FOR_subborrowdi;
36070 icode2 = CODE_FOR_subborrowdi_0;
36071 mode0 = DImode;
36072 mode1 = TImode;
36073 mode2 = CCmode;
36074 goto handlecarry;
36076 case IX86_BUILTIN_ADDCARRYX32:
36077 icode = CODE_FOR_addcarrysi;
36078 icode2 = CODE_FOR_addcarrysi_0;
36079 mode0 = SImode;
36080 mode1 = DImode;
36081 mode2 = CCCmode;
36082 goto handlecarry;
36084 case IX86_BUILTIN_ADDCARRYX64:
36085 icode = CODE_FOR_addcarrydi;
36086 icode2 = CODE_FOR_addcarrydi_0;
36087 mode0 = DImode;
36088 mode1 = TImode;
36089 mode2 = CCCmode;
36091 handlecarry:
36092 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
36093 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
36094 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
36095 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
36097 op1 = expand_normal (arg0);
36098 if (!integer_zerop (arg0))
36099 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
36101 op2 = expand_normal (arg1);
36102 if (!register_operand (op2, mode0))
36103 op2 = copy_to_mode_reg (mode0, op2);
36105 op3 = expand_normal (arg2);
36106 if (!register_operand (op3, mode0))
36107 op3 = copy_to_mode_reg (mode0, op3);
36109 op4 = expand_normal (arg3);
36110 if (!address_operand (op4, VOIDmode))
36112 op4 = convert_memory_address (Pmode, op4);
36113 op4 = copy_addr_to_reg (op4);
36116 op0 = gen_reg_rtx (mode0);
36117 if (integer_zerop (arg0))
36119 /* If arg0 is 0, optimize right away into add or sub
36120 instruction that sets CCCmode flags. */
36121 op1 = gen_rtx_REG (mode2, FLAGS_REG);
36122 emit_insn (GEN_FCN (icode2) (op0, op2, op3));
36124 else
36126 /* Generate CF from input operand. */
36127 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
36129 /* Generate instruction that consumes CF. */
36130 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
36131 pat = gen_rtx_LTU (mode1, op1, const0_rtx);
36132 pat2 = gen_rtx_LTU (mode0, op1, const0_rtx);
36133 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat, pat2));
36136 /* Return current CF value. */
36137 if (target == 0)
36138 target = gen_reg_rtx (QImode);
36140 pat = gen_rtx_LTU (QImode, op1, const0_rtx);
36141 emit_insn (gen_rtx_SET (target, pat));
36143 /* Store the result. */
36144 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
36146 return target;
36148 case IX86_BUILTIN_READ_FLAGS:
36149 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
36151 if (optimize
36152 || target == NULL_RTX
36153 || !nonimmediate_operand (target, word_mode)
36154 || GET_MODE (target) != word_mode)
36155 target = gen_reg_rtx (word_mode);
36157 emit_insn (gen_pop (target));
36158 return target;
36160 case IX86_BUILTIN_WRITE_FLAGS:
36162 arg0 = CALL_EXPR_ARG (exp, 0);
36163 op0 = expand_normal (arg0);
36164 if (!general_no_elim_operand (op0, word_mode))
36165 op0 = copy_to_mode_reg (word_mode, op0);
36167 emit_insn (gen_push (op0));
36168 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
36169 return 0;
36171 case IX86_BUILTIN_KTESTC8:
36172 icode = CODE_FOR_ktestqi;
36173 mode3 = CCCmode;
36174 goto kortest;
36176 case IX86_BUILTIN_KTESTZ8:
36177 icode = CODE_FOR_ktestqi;
36178 mode3 = CCZmode;
36179 goto kortest;
36181 case IX86_BUILTIN_KTESTC16:
36182 icode = CODE_FOR_ktesthi;
36183 mode3 = CCCmode;
36184 goto kortest;
36186 case IX86_BUILTIN_KTESTZ16:
36187 icode = CODE_FOR_ktesthi;
36188 mode3 = CCZmode;
36189 goto kortest;
36191 case IX86_BUILTIN_KTESTC32:
36192 icode = CODE_FOR_ktestsi;
36193 mode3 = CCCmode;
36194 goto kortest;
36196 case IX86_BUILTIN_KTESTZ32:
36197 icode = CODE_FOR_ktestsi;
36198 mode3 = CCZmode;
36199 goto kortest;
36201 case IX86_BUILTIN_KTESTC64:
36202 icode = CODE_FOR_ktestdi;
36203 mode3 = CCCmode;
36204 goto kortest;
36206 case IX86_BUILTIN_KTESTZ64:
36207 icode = CODE_FOR_ktestdi;
36208 mode3 = CCZmode;
36209 goto kortest;
36211 case IX86_BUILTIN_KORTESTC8:
36212 icode = CODE_FOR_kortestqi;
36213 mode3 = CCCmode;
36214 goto kortest;
36216 case IX86_BUILTIN_KORTESTZ8:
36217 icode = CODE_FOR_kortestqi;
36218 mode3 = CCZmode;
36219 goto kortest;
36221 case IX86_BUILTIN_KORTESTC16:
36222 icode = CODE_FOR_kortesthi;
36223 mode3 = CCCmode;
36224 goto kortest;
36226 case IX86_BUILTIN_KORTESTZ16:
36227 icode = CODE_FOR_kortesthi;
36228 mode3 = CCZmode;
36229 goto kortest;
36231 case IX86_BUILTIN_KORTESTC32:
36232 icode = CODE_FOR_kortestsi;
36233 mode3 = CCCmode;
36234 goto kortest;
36236 case IX86_BUILTIN_KORTESTZ32:
36237 icode = CODE_FOR_kortestsi;
36238 mode3 = CCZmode;
36239 goto kortest;
36241 case IX86_BUILTIN_KORTESTC64:
36242 icode = CODE_FOR_kortestdi;
36243 mode3 = CCCmode;
36244 goto kortest;
36246 case IX86_BUILTIN_KORTESTZ64:
36247 icode = CODE_FOR_kortestdi;
36248 mode3 = CCZmode;
36250 kortest:
36251 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
36252 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
36253 op0 = expand_normal (arg0);
36254 op1 = expand_normal (arg1);
36256 mode0 = insn_data[icode].operand[0].mode;
36257 mode1 = insn_data[icode].operand[1].mode;
36259 if (GET_MODE (op0) != VOIDmode)
36260 op0 = force_reg (GET_MODE (op0), op0);
36262 op0 = gen_lowpart (mode0, op0);
36264 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36265 op0 = copy_to_mode_reg (mode0, op0);
36267 if (GET_MODE (op1) != VOIDmode)
36268 op1 = force_reg (GET_MODE (op1), op1);
36270 op1 = gen_lowpart (mode1, op1);
36272 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36273 op1 = copy_to_mode_reg (mode1, op1);
36275 target = gen_reg_rtx (QImode);
36277 /* Emit kortest. */
36278 emit_insn (GEN_FCN (icode) (op0, op1));
36279 /* And use setcc to return result from flags. */
36280 ix86_expand_setcc (target, EQ,
36281 gen_rtx_REG (mode3, FLAGS_REG), const0_rtx);
36282 return target;
36284 case IX86_BUILTIN_GATHERSIV2DF:
36285 icode = CODE_FOR_avx2_gathersiv2df;
36286 goto gather_gen;
36287 case IX86_BUILTIN_GATHERSIV4DF:
36288 icode = CODE_FOR_avx2_gathersiv4df;
36289 goto gather_gen;
36290 case IX86_BUILTIN_GATHERDIV2DF:
36291 icode = CODE_FOR_avx2_gatherdiv2df;
36292 goto gather_gen;
36293 case IX86_BUILTIN_GATHERDIV4DF:
36294 icode = CODE_FOR_avx2_gatherdiv4df;
36295 goto gather_gen;
36296 case IX86_BUILTIN_GATHERSIV4SF:
36297 icode = CODE_FOR_avx2_gathersiv4sf;
36298 goto gather_gen;
36299 case IX86_BUILTIN_GATHERSIV8SF:
36300 icode = CODE_FOR_avx2_gathersiv8sf;
36301 goto gather_gen;
36302 case IX86_BUILTIN_GATHERDIV4SF:
36303 icode = CODE_FOR_avx2_gatherdiv4sf;
36304 goto gather_gen;
36305 case IX86_BUILTIN_GATHERDIV8SF:
36306 icode = CODE_FOR_avx2_gatherdiv8sf;
36307 goto gather_gen;
36308 case IX86_BUILTIN_GATHERSIV2DI:
36309 icode = CODE_FOR_avx2_gathersiv2di;
36310 goto gather_gen;
36311 case IX86_BUILTIN_GATHERSIV4DI:
36312 icode = CODE_FOR_avx2_gathersiv4di;
36313 goto gather_gen;
36314 case IX86_BUILTIN_GATHERDIV2DI:
36315 icode = CODE_FOR_avx2_gatherdiv2di;
36316 goto gather_gen;
36317 case IX86_BUILTIN_GATHERDIV4DI:
36318 icode = CODE_FOR_avx2_gatherdiv4di;
36319 goto gather_gen;
36320 case IX86_BUILTIN_GATHERSIV4SI:
36321 icode = CODE_FOR_avx2_gathersiv4si;
36322 goto gather_gen;
36323 case IX86_BUILTIN_GATHERSIV8SI:
36324 icode = CODE_FOR_avx2_gathersiv8si;
36325 goto gather_gen;
36326 case IX86_BUILTIN_GATHERDIV4SI:
36327 icode = CODE_FOR_avx2_gatherdiv4si;
36328 goto gather_gen;
36329 case IX86_BUILTIN_GATHERDIV8SI:
36330 icode = CODE_FOR_avx2_gatherdiv8si;
36331 goto gather_gen;
36332 case IX86_BUILTIN_GATHERALTSIV4DF:
36333 icode = CODE_FOR_avx2_gathersiv4df;
36334 goto gather_gen;
36335 case IX86_BUILTIN_GATHERALTDIV8SF:
36336 icode = CODE_FOR_avx2_gatherdiv8sf;
36337 goto gather_gen;
36338 case IX86_BUILTIN_GATHERALTSIV4DI:
36339 icode = CODE_FOR_avx2_gathersiv4di;
36340 goto gather_gen;
36341 case IX86_BUILTIN_GATHERALTDIV8SI:
36342 icode = CODE_FOR_avx2_gatherdiv8si;
36343 goto gather_gen;
36344 case IX86_BUILTIN_GATHER3SIV16SF:
36345 icode = CODE_FOR_avx512f_gathersiv16sf;
36346 goto gather_gen;
36347 case IX86_BUILTIN_GATHER3SIV8DF:
36348 icode = CODE_FOR_avx512f_gathersiv8df;
36349 goto gather_gen;
36350 case IX86_BUILTIN_GATHER3DIV16SF:
36351 icode = CODE_FOR_avx512f_gatherdiv16sf;
36352 goto gather_gen;
36353 case IX86_BUILTIN_GATHER3DIV8DF:
36354 icode = CODE_FOR_avx512f_gatherdiv8df;
36355 goto gather_gen;
36356 case IX86_BUILTIN_GATHER3SIV16SI:
36357 icode = CODE_FOR_avx512f_gathersiv16si;
36358 goto gather_gen;
36359 case IX86_BUILTIN_GATHER3SIV8DI:
36360 icode = CODE_FOR_avx512f_gathersiv8di;
36361 goto gather_gen;
36362 case IX86_BUILTIN_GATHER3DIV16SI:
36363 icode = CODE_FOR_avx512f_gatherdiv16si;
36364 goto gather_gen;
36365 case IX86_BUILTIN_GATHER3DIV8DI:
36366 icode = CODE_FOR_avx512f_gatherdiv8di;
36367 goto gather_gen;
36368 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36369 icode = CODE_FOR_avx512f_gathersiv8df;
36370 goto gather_gen;
36371 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36372 icode = CODE_FOR_avx512f_gatherdiv16sf;
36373 goto gather_gen;
36374 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36375 icode = CODE_FOR_avx512f_gathersiv8di;
36376 goto gather_gen;
36377 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36378 icode = CODE_FOR_avx512f_gatherdiv16si;
36379 goto gather_gen;
36380 case IX86_BUILTIN_GATHER3SIV2DF:
36381 icode = CODE_FOR_avx512vl_gathersiv2df;
36382 goto gather_gen;
36383 case IX86_BUILTIN_GATHER3SIV4DF:
36384 icode = CODE_FOR_avx512vl_gathersiv4df;
36385 goto gather_gen;
36386 case IX86_BUILTIN_GATHER3DIV2DF:
36387 icode = CODE_FOR_avx512vl_gatherdiv2df;
36388 goto gather_gen;
36389 case IX86_BUILTIN_GATHER3DIV4DF:
36390 icode = CODE_FOR_avx512vl_gatherdiv4df;
36391 goto gather_gen;
36392 case IX86_BUILTIN_GATHER3SIV4SF:
36393 icode = CODE_FOR_avx512vl_gathersiv4sf;
36394 goto gather_gen;
36395 case IX86_BUILTIN_GATHER3SIV8SF:
36396 icode = CODE_FOR_avx512vl_gathersiv8sf;
36397 goto gather_gen;
36398 case IX86_BUILTIN_GATHER3DIV4SF:
36399 icode = CODE_FOR_avx512vl_gatherdiv4sf;
36400 goto gather_gen;
36401 case IX86_BUILTIN_GATHER3DIV8SF:
36402 icode = CODE_FOR_avx512vl_gatherdiv8sf;
36403 goto gather_gen;
36404 case IX86_BUILTIN_GATHER3SIV2DI:
36405 icode = CODE_FOR_avx512vl_gathersiv2di;
36406 goto gather_gen;
36407 case IX86_BUILTIN_GATHER3SIV4DI:
36408 icode = CODE_FOR_avx512vl_gathersiv4di;
36409 goto gather_gen;
36410 case IX86_BUILTIN_GATHER3DIV2DI:
36411 icode = CODE_FOR_avx512vl_gatherdiv2di;
36412 goto gather_gen;
36413 case IX86_BUILTIN_GATHER3DIV4DI:
36414 icode = CODE_FOR_avx512vl_gatherdiv4di;
36415 goto gather_gen;
36416 case IX86_BUILTIN_GATHER3SIV4SI:
36417 icode = CODE_FOR_avx512vl_gathersiv4si;
36418 goto gather_gen;
36419 case IX86_BUILTIN_GATHER3SIV8SI:
36420 icode = CODE_FOR_avx512vl_gathersiv8si;
36421 goto gather_gen;
36422 case IX86_BUILTIN_GATHER3DIV4SI:
36423 icode = CODE_FOR_avx512vl_gatherdiv4si;
36424 goto gather_gen;
36425 case IX86_BUILTIN_GATHER3DIV8SI:
36426 icode = CODE_FOR_avx512vl_gatherdiv8si;
36427 goto gather_gen;
36428 case IX86_BUILTIN_GATHER3ALTSIV4DF:
36429 icode = CODE_FOR_avx512vl_gathersiv4df;
36430 goto gather_gen;
36431 case IX86_BUILTIN_GATHER3ALTDIV8SF:
36432 icode = CODE_FOR_avx512vl_gatherdiv8sf;
36433 goto gather_gen;
36434 case IX86_BUILTIN_GATHER3ALTSIV4DI:
36435 icode = CODE_FOR_avx512vl_gathersiv4di;
36436 goto gather_gen;
36437 case IX86_BUILTIN_GATHER3ALTDIV8SI:
36438 icode = CODE_FOR_avx512vl_gatherdiv8si;
36439 goto gather_gen;
36440 case IX86_BUILTIN_SCATTERSIV16SF:
36441 icode = CODE_FOR_avx512f_scattersiv16sf;
36442 goto scatter_gen;
36443 case IX86_BUILTIN_SCATTERSIV8DF:
36444 icode = CODE_FOR_avx512f_scattersiv8df;
36445 goto scatter_gen;
36446 case IX86_BUILTIN_SCATTERDIV16SF:
36447 icode = CODE_FOR_avx512f_scatterdiv16sf;
36448 goto scatter_gen;
36449 case IX86_BUILTIN_SCATTERDIV8DF:
36450 icode = CODE_FOR_avx512f_scatterdiv8df;
36451 goto scatter_gen;
36452 case IX86_BUILTIN_SCATTERSIV16SI:
36453 icode = CODE_FOR_avx512f_scattersiv16si;
36454 goto scatter_gen;
36455 case IX86_BUILTIN_SCATTERSIV8DI:
36456 icode = CODE_FOR_avx512f_scattersiv8di;
36457 goto scatter_gen;
36458 case IX86_BUILTIN_SCATTERDIV16SI:
36459 icode = CODE_FOR_avx512f_scatterdiv16si;
36460 goto scatter_gen;
36461 case IX86_BUILTIN_SCATTERDIV8DI:
36462 icode = CODE_FOR_avx512f_scatterdiv8di;
36463 goto scatter_gen;
36464 case IX86_BUILTIN_SCATTERSIV8SF:
36465 icode = CODE_FOR_avx512vl_scattersiv8sf;
36466 goto scatter_gen;
36467 case IX86_BUILTIN_SCATTERSIV4SF:
36468 icode = CODE_FOR_avx512vl_scattersiv4sf;
36469 goto scatter_gen;
36470 case IX86_BUILTIN_SCATTERSIV4DF:
36471 icode = CODE_FOR_avx512vl_scattersiv4df;
36472 goto scatter_gen;
36473 case IX86_BUILTIN_SCATTERSIV2DF:
36474 icode = CODE_FOR_avx512vl_scattersiv2df;
36475 goto scatter_gen;
36476 case IX86_BUILTIN_SCATTERDIV8SF:
36477 icode = CODE_FOR_avx512vl_scatterdiv8sf;
36478 goto scatter_gen;
36479 case IX86_BUILTIN_SCATTERDIV4SF:
36480 icode = CODE_FOR_avx512vl_scatterdiv4sf;
36481 goto scatter_gen;
36482 case IX86_BUILTIN_SCATTERDIV4DF:
36483 icode = CODE_FOR_avx512vl_scatterdiv4df;
36484 goto scatter_gen;
36485 case IX86_BUILTIN_SCATTERDIV2DF:
36486 icode = CODE_FOR_avx512vl_scatterdiv2df;
36487 goto scatter_gen;
36488 case IX86_BUILTIN_SCATTERSIV8SI:
36489 icode = CODE_FOR_avx512vl_scattersiv8si;
36490 goto scatter_gen;
36491 case IX86_BUILTIN_SCATTERSIV4SI:
36492 icode = CODE_FOR_avx512vl_scattersiv4si;
36493 goto scatter_gen;
36494 case IX86_BUILTIN_SCATTERSIV4DI:
36495 icode = CODE_FOR_avx512vl_scattersiv4di;
36496 goto scatter_gen;
36497 case IX86_BUILTIN_SCATTERSIV2DI:
36498 icode = CODE_FOR_avx512vl_scattersiv2di;
36499 goto scatter_gen;
36500 case IX86_BUILTIN_SCATTERDIV8SI:
36501 icode = CODE_FOR_avx512vl_scatterdiv8si;
36502 goto scatter_gen;
36503 case IX86_BUILTIN_SCATTERDIV4SI:
36504 icode = CODE_FOR_avx512vl_scatterdiv4si;
36505 goto scatter_gen;
36506 case IX86_BUILTIN_SCATTERDIV4DI:
36507 icode = CODE_FOR_avx512vl_scatterdiv4di;
36508 goto scatter_gen;
36509 case IX86_BUILTIN_SCATTERDIV2DI:
36510 icode = CODE_FOR_avx512vl_scatterdiv2di;
36511 goto scatter_gen;
36512 case IX86_BUILTIN_GATHERPFDPD:
36513 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
36514 goto vec_prefetch_gen;
36515 case IX86_BUILTIN_SCATTERALTSIV8DF:
36516 icode = CODE_FOR_avx512f_scattersiv8df;
36517 goto scatter_gen;
36518 case IX86_BUILTIN_SCATTERALTDIV16SF:
36519 icode = CODE_FOR_avx512f_scatterdiv16sf;
36520 goto scatter_gen;
36521 case IX86_BUILTIN_SCATTERALTSIV8DI:
36522 icode = CODE_FOR_avx512f_scattersiv8di;
36523 goto scatter_gen;
36524 case IX86_BUILTIN_SCATTERALTDIV16SI:
36525 icode = CODE_FOR_avx512f_scatterdiv16si;
36526 goto scatter_gen;
36527 case IX86_BUILTIN_GATHERPFDPS:
36528 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
36529 goto vec_prefetch_gen;
36530 case IX86_BUILTIN_GATHERPFQPD:
36531 icode = CODE_FOR_avx512pf_gatherpfv8didf;
36532 goto vec_prefetch_gen;
36533 case IX86_BUILTIN_GATHERPFQPS:
36534 icode = CODE_FOR_avx512pf_gatherpfv8disf;
36535 goto vec_prefetch_gen;
36536 case IX86_BUILTIN_SCATTERPFDPD:
36537 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
36538 goto vec_prefetch_gen;
36539 case IX86_BUILTIN_SCATTERPFDPS:
36540 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
36541 goto vec_prefetch_gen;
36542 case IX86_BUILTIN_SCATTERPFQPD:
36543 icode = CODE_FOR_avx512pf_scatterpfv8didf;
36544 goto vec_prefetch_gen;
36545 case IX86_BUILTIN_SCATTERPFQPS:
36546 icode = CODE_FOR_avx512pf_scatterpfv8disf;
36547 goto vec_prefetch_gen;
36549 gather_gen:
36550 rtx half;
36551 rtx (*gen) (rtx, rtx);
36553 arg0 = CALL_EXPR_ARG (exp, 0);
36554 arg1 = CALL_EXPR_ARG (exp, 1);
36555 arg2 = CALL_EXPR_ARG (exp, 2);
36556 arg3 = CALL_EXPR_ARG (exp, 3);
36557 arg4 = CALL_EXPR_ARG (exp, 4);
36558 op0 = expand_normal (arg0);
36559 op1 = expand_normal (arg1);
36560 op2 = expand_normal (arg2);
36561 op3 = expand_normal (arg3);
36562 op4 = expand_normal (arg4);
36563 /* Note the arg order is different from the operand order. */
36564 mode0 = insn_data[icode].operand[1].mode;
36565 mode2 = insn_data[icode].operand[3].mode;
36566 mode3 = insn_data[icode].operand[4].mode;
36567 mode4 = insn_data[icode].operand[5].mode;
36569 if (target == NULL_RTX
36570 || GET_MODE (target) != insn_data[icode].operand[0].mode
36571 || !insn_data[icode].operand[0].predicate (target,
36572 GET_MODE (target)))
36573 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
36574 else
36575 subtarget = target;
36577 switch (fcode)
36579 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36580 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36581 half = gen_reg_rtx (V8SImode);
36582 if (!nonimmediate_operand (op2, V16SImode))
36583 op2 = copy_to_mode_reg (V16SImode, op2);
36584 emit_insn (gen_vec_extract_lo_v16si (half, op2));
36585 op2 = half;
36586 break;
36587 case IX86_BUILTIN_GATHER3ALTSIV4DF:
36588 case IX86_BUILTIN_GATHER3ALTSIV4DI:
36589 case IX86_BUILTIN_GATHERALTSIV4DF:
36590 case IX86_BUILTIN_GATHERALTSIV4DI:
36591 half = gen_reg_rtx (V4SImode);
36592 if (!nonimmediate_operand (op2, V8SImode))
36593 op2 = copy_to_mode_reg (V8SImode, op2);
36594 emit_insn (gen_vec_extract_lo_v8si (half, op2));
36595 op2 = half;
36596 break;
36597 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36598 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36599 half = gen_reg_rtx (mode0);
36600 if (mode0 == V8SFmode)
36601 gen = gen_vec_extract_lo_v16sf;
36602 else
36603 gen = gen_vec_extract_lo_v16si;
36604 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36605 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36606 emit_insn (gen (half, op0));
36607 op0 = half;
36608 if (GET_MODE (op3) != VOIDmode)
36610 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36611 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36612 emit_insn (gen (half, op3));
36613 op3 = half;
36615 break;
36616 case IX86_BUILTIN_GATHER3ALTDIV8SF:
36617 case IX86_BUILTIN_GATHER3ALTDIV8SI:
36618 case IX86_BUILTIN_GATHERALTDIV8SF:
36619 case IX86_BUILTIN_GATHERALTDIV8SI:
36620 half = gen_reg_rtx (mode0);
36621 if (mode0 == V4SFmode)
36622 gen = gen_vec_extract_lo_v8sf;
36623 else
36624 gen = gen_vec_extract_lo_v8si;
36625 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36626 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36627 emit_insn (gen (half, op0));
36628 op0 = half;
36629 if (GET_MODE (op3) != VOIDmode)
36631 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36632 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36633 emit_insn (gen (half, op3));
36634 op3 = half;
36636 break;
36637 default:
36638 break;
36641 /* Force memory operand only with base register here. But we
36642 don't want to do it on memory operand for other builtin
36643 functions. */
36644 op1 = ix86_zero_extend_to_Pmode (op1);
36646 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36647 op0 = copy_to_mode_reg (mode0, op0);
36648 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
36649 op1 = copy_to_mode_reg (Pmode, op1);
36650 if (!insn_data[icode].operand[3].predicate (op2, mode2))
36651 op2 = copy_to_mode_reg (mode2, op2);
36653 op3 = fixup_modeless_constant (op3, mode3);
36655 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
36657 if (!insn_data[icode].operand[4].predicate (op3, mode3))
36658 op3 = copy_to_mode_reg (mode3, op3);
36660 else
36662 op3 = copy_to_reg (op3);
36663 op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
36665 if (!insn_data[icode].operand[5].predicate (op4, mode4))
36667 error ("the last argument must be scale 1, 2, 4, 8");
36668 return const0_rtx;
36671 /* Optimize. If mask is known to have all high bits set,
36672 replace op0 with pc_rtx to signal that the instruction
36673 overwrites the whole destination and doesn't use its
36674 previous contents. */
36675 if (optimize)
36677 if (TREE_CODE (arg3) == INTEGER_CST)
36679 if (integer_all_onesp (arg3))
36680 op0 = pc_rtx;
36682 else if (TREE_CODE (arg3) == VECTOR_CST)
36684 unsigned int negative = 0;
36685 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36687 tree cst = VECTOR_CST_ELT (arg3, i);
36688 if (TREE_CODE (cst) == INTEGER_CST
36689 && tree_int_cst_sign_bit (cst))
36690 negative++;
36691 else if (TREE_CODE (cst) == REAL_CST
36692 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36693 negative++;
36695 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36696 op0 = pc_rtx;
36698 else if (TREE_CODE (arg3) == SSA_NAME
36699 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36701 /* Recognize also when mask is like:
36702 __v2df src = _mm_setzero_pd ();
36703 __v2df mask = _mm_cmpeq_pd (src, src);
36705 __v8sf src = _mm256_setzero_ps ();
36706 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36707 as that is a cheaper way to load all ones into
36708 a register than having to load a constant from
36709 memory. */
36710 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
36711 if (is_gimple_call (def_stmt))
36713 tree fndecl = gimple_call_fndecl (def_stmt);
36714 if (fndecl
36715 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36716 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36718 case IX86_BUILTIN_CMPPD:
36719 case IX86_BUILTIN_CMPPS:
36720 case IX86_BUILTIN_CMPPD256:
36721 case IX86_BUILTIN_CMPPS256:
36722 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36723 break;
36724 /* FALLTHRU */
36725 case IX86_BUILTIN_CMPEQPD:
36726 case IX86_BUILTIN_CMPEQPS:
36727 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36728 && initializer_zerop (gimple_call_arg (def_stmt,
36729 1)))
36730 op0 = pc_rtx;
36731 break;
36732 default:
36733 break;
36739 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36740 if (! pat)
36741 return const0_rtx;
36742 emit_insn (pat);
36744 switch (fcode)
36746 case IX86_BUILTIN_GATHER3DIV16SF:
36747 if (target == NULL_RTX)
36748 target = gen_reg_rtx (V8SFmode);
36749 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36750 break;
36751 case IX86_BUILTIN_GATHER3DIV16SI:
36752 if (target == NULL_RTX)
36753 target = gen_reg_rtx (V8SImode);
36754 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36755 break;
36756 case IX86_BUILTIN_GATHER3DIV8SF:
36757 case IX86_BUILTIN_GATHERDIV8SF:
36758 if (target == NULL_RTX)
36759 target = gen_reg_rtx (V4SFmode);
36760 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36761 break;
36762 case IX86_BUILTIN_GATHER3DIV8SI:
36763 case IX86_BUILTIN_GATHERDIV8SI:
36764 if (target == NULL_RTX)
36765 target = gen_reg_rtx (V4SImode);
36766 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36767 break;
36768 default:
36769 target = subtarget;
36770 break;
36772 return target;
36774 scatter_gen:
36775 arg0 = CALL_EXPR_ARG (exp, 0);
36776 arg1 = CALL_EXPR_ARG (exp, 1);
36777 arg2 = CALL_EXPR_ARG (exp, 2);
36778 arg3 = CALL_EXPR_ARG (exp, 3);
36779 arg4 = CALL_EXPR_ARG (exp, 4);
36780 op0 = expand_normal (arg0);
36781 op1 = expand_normal (arg1);
36782 op2 = expand_normal (arg2);
36783 op3 = expand_normal (arg3);
36784 op4 = expand_normal (arg4);
36785 mode1 = insn_data[icode].operand[1].mode;
36786 mode2 = insn_data[icode].operand[2].mode;
36787 mode3 = insn_data[icode].operand[3].mode;
36788 mode4 = insn_data[icode].operand[4].mode;
36790 /* Scatter instruction stores operand op3 to memory with
36791 indices from op2 and scale from op4 under writemask op1.
36792 If index operand op2 has more elements then source operand
36793 op3 one need to use only its low half. And vice versa. */
36794 switch (fcode)
36796 case IX86_BUILTIN_SCATTERALTSIV8DF:
36797 case IX86_BUILTIN_SCATTERALTSIV8DI:
36798 half = gen_reg_rtx (V8SImode);
36799 if (!nonimmediate_operand (op2, V16SImode))
36800 op2 = copy_to_mode_reg (V16SImode, op2);
36801 emit_insn (gen_vec_extract_lo_v16si (half, op2));
36802 op2 = half;
36803 break;
36804 case IX86_BUILTIN_SCATTERALTDIV16SF:
36805 case IX86_BUILTIN_SCATTERALTDIV16SI:
36806 half = gen_reg_rtx (mode3);
36807 if (mode3 == V8SFmode)
36808 gen = gen_vec_extract_lo_v16sf;
36809 else
36810 gen = gen_vec_extract_lo_v16si;
36811 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36812 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36813 emit_insn (gen (half, op3));
36814 op3 = half;
36815 break;
36816 default:
36817 break;
36820 /* Force memory operand only with base register here. But we
36821 don't want to do it on memory operand for other builtin
36822 functions. */
36823 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36825 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36826 op0 = copy_to_mode_reg (Pmode, op0);
36828 op1 = fixup_modeless_constant (op1, mode1);
36830 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36832 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36833 op1 = copy_to_mode_reg (mode1, op1);
36835 else
36837 op1 = copy_to_reg (op1);
36838 op1 = lowpart_subreg (mode1, op1, GET_MODE (op1));
36841 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36842 op2 = copy_to_mode_reg (mode2, op2);
36844 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36845 op3 = copy_to_mode_reg (mode3, op3);
36847 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36849 error ("the last argument must be scale 1, 2, 4, 8");
36850 return const0_rtx;
36853 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36854 if (! pat)
36855 return const0_rtx;
36857 emit_insn (pat);
36858 return 0;
36860 vec_prefetch_gen:
36861 arg0 = CALL_EXPR_ARG (exp, 0);
36862 arg1 = CALL_EXPR_ARG (exp, 1);
36863 arg2 = CALL_EXPR_ARG (exp, 2);
36864 arg3 = CALL_EXPR_ARG (exp, 3);
36865 arg4 = CALL_EXPR_ARG (exp, 4);
36866 op0 = expand_normal (arg0);
36867 op1 = expand_normal (arg1);
36868 op2 = expand_normal (arg2);
36869 op3 = expand_normal (arg3);
36870 op4 = expand_normal (arg4);
36871 mode0 = insn_data[icode].operand[0].mode;
36872 mode1 = insn_data[icode].operand[1].mode;
36873 mode3 = insn_data[icode].operand[3].mode;
36874 mode4 = insn_data[icode].operand[4].mode;
36876 op0 = fixup_modeless_constant (op0, mode0);
36878 if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
36880 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36881 op0 = copy_to_mode_reg (mode0, op0);
36883 else
36885 op0 = copy_to_reg (op0);
36886 op0 = lowpart_subreg (mode0, op0, GET_MODE (op0));
36889 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36890 op1 = copy_to_mode_reg (mode1, op1);
36892 /* Force memory operand only with base register here. But we
36893 don't want to do it on memory operand for other builtin
36894 functions. */
36895 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36897 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36898 op2 = copy_to_mode_reg (Pmode, op2);
36900 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36902 error ("the forth argument must be scale 1, 2, 4, 8");
36903 return const0_rtx;
36906 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36908 error ("incorrect hint operand");
36909 return const0_rtx;
36912 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36913 if (! pat)
36914 return const0_rtx;
36916 emit_insn (pat);
36918 return 0;
36920 case IX86_BUILTIN_XABORT:
36921 icode = CODE_FOR_xabort;
36922 arg0 = CALL_EXPR_ARG (exp, 0);
36923 op0 = expand_normal (arg0);
36924 mode0 = insn_data[icode].operand[0].mode;
36925 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36927 error ("the xabort's argument must be an 8-bit immediate");
36928 return const0_rtx;
36930 emit_insn (gen_xabort (op0));
36931 return 0;
36933 case IX86_BUILTIN_RSTORSSP:
36934 case IX86_BUILTIN_CLRSSBSY:
36935 arg0 = CALL_EXPR_ARG (exp, 0);
36936 op0 = expand_normal (arg0);
36937 icode = (fcode == IX86_BUILTIN_RSTORSSP
36938 ? CODE_FOR_rstorssp
36939 : CODE_FOR_clrssbsy);
36940 if (!address_operand (op0, VOIDmode))
36942 op1 = convert_memory_address (Pmode, op0);
36943 op0 = copy_addr_to_reg (op1);
36945 emit_insn (GEN_FCN (icode) (gen_rtx_MEM (Pmode, op0)));
36946 return 0;
36948 case IX86_BUILTIN_WRSSD:
36949 case IX86_BUILTIN_WRSSQ:
36950 case IX86_BUILTIN_WRUSSD:
36951 case IX86_BUILTIN_WRUSSQ:
36952 arg0 = CALL_EXPR_ARG (exp, 0);
36953 op0 = expand_normal (arg0);
36954 arg1 = CALL_EXPR_ARG (exp, 1);
36955 op1 = expand_normal (arg1);
36956 switch (fcode)
36958 case IX86_BUILTIN_WRSSD:
36959 icode = CODE_FOR_wrsssi;
36960 mode = SImode;
36961 break;
36962 case IX86_BUILTIN_WRSSQ:
36963 icode = CODE_FOR_wrssdi;
36964 mode = DImode;
36965 break;
36966 case IX86_BUILTIN_WRUSSD:
36967 icode = CODE_FOR_wrusssi;
36968 mode = SImode;
36969 break;
36970 case IX86_BUILTIN_WRUSSQ:
36971 icode = CODE_FOR_wrussdi;
36972 mode = DImode;
36973 break;
36975 op0 = force_reg (mode, op0);
36976 if (!address_operand (op1, VOIDmode))
36978 op2 = convert_memory_address (Pmode, op1);
36979 op1 = copy_addr_to_reg (op2);
36981 emit_insn (GEN_FCN (icode) (op0, gen_rtx_MEM (mode, op1)));
36982 return 0;
36984 default:
36985 break;
36988 if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
36989 && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST)
36991 i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST;
36992 return ix86_expand_special_args_builtin (bdesc_special_args + i, exp,
36993 target);
36996 if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST
36997 && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST)
36999 i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST;
37000 switch (fcode)
37002 case IX86_BUILTIN_FABSQ:
37003 case IX86_BUILTIN_COPYSIGNQ:
37004 if (!TARGET_SSE)
37005 /* Emit a normal call if SSE isn't available. */
37006 return expand_call (exp, target, ignore);
37007 /* FALLTHRU */
37008 default:
37009 return ix86_expand_args_builtin (bdesc_args + i, exp, target);
37013 if (fcode >= IX86_BUILTIN__BDESC_ARGS2_FIRST
37014 && fcode <= IX86_BUILTIN__BDESC_ARGS2_LAST)
37016 i = fcode - IX86_BUILTIN__BDESC_ARGS2_FIRST;
37017 rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL;
37018 rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx);
37019 rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx);
37020 int masked = 1;
37021 machine_mode mode, wide_mode, nar_mode;
37023 nar_mode = V4SFmode;
37024 mode = V16SFmode;
37025 wide_mode = V64SFmode;
37026 fcn_mask = gen_avx5124fmaddps_4fmaddps_mask;
37027 fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz;
37029 switch (fcode)
37031 case IX86_BUILTIN_4FMAPS:
37032 fcn = gen_avx5124fmaddps_4fmaddps;
37033 masked = 0;
37034 goto v4fma_expand;
37036 case IX86_BUILTIN_4DPWSSD:
37037 nar_mode = V4SImode;
37038 mode = V16SImode;
37039 wide_mode = V64SImode;
37040 fcn = gen_avx5124vnniw_vp4dpwssd;
37041 masked = 0;
37042 goto v4fma_expand;
37044 case IX86_BUILTIN_4DPWSSDS:
37045 nar_mode = V4SImode;
37046 mode = V16SImode;
37047 wide_mode = V64SImode;
37048 fcn = gen_avx5124vnniw_vp4dpwssds;
37049 masked = 0;
37050 goto v4fma_expand;
37052 case IX86_BUILTIN_4FNMAPS:
37053 fcn = gen_avx5124fmaddps_4fnmaddps;
37054 masked = 0;
37055 goto v4fma_expand;
37057 case IX86_BUILTIN_4FNMAPS_MASK:
37058 fcn_mask = gen_avx5124fmaddps_4fnmaddps_mask;
37059 fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz;
37060 goto v4fma_expand;
37062 case IX86_BUILTIN_4DPWSSD_MASK:
37063 nar_mode = V4SImode;
37064 mode = V16SImode;
37065 wide_mode = V64SImode;
37066 fcn_mask = gen_avx5124vnniw_vp4dpwssd_mask;
37067 fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz;
37068 goto v4fma_expand;
37070 case IX86_BUILTIN_4DPWSSDS_MASK:
37071 nar_mode = V4SImode;
37072 mode = V16SImode;
37073 wide_mode = V64SImode;
37074 fcn_mask = gen_avx5124vnniw_vp4dpwssds_mask;
37075 fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz;
37076 goto v4fma_expand;
37078 case IX86_BUILTIN_4FMAPS_MASK:
37080 tree args[4];
37081 rtx ops[4];
37082 rtx wide_reg;
37083 rtx accum;
37084 rtx addr;
37085 rtx mem;
37087 v4fma_expand:
37088 wide_reg = gen_reg_rtx (wide_mode);
37089 for (i = 0; i < 4; i++)
37091 args[i] = CALL_EXPR_ARG (exp, i);
37092 ops[i] = expand_normal (args[i]);
37094 emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64),
37095 ops[i]);
37098 accum = expand_normal (CALL_EXPR_ARG (exp, 4));
37099 accum = force_reg (mode, accum);
37101 addr = expand_normal (CALL_EXPR_ARG (exp, 5));
37102 addr = force_reg (Pmode, addr);
37104 mem = gen_rtx_MEM (nar_mode, addr);
37106 target = gen_reg_rtx (mode);
37108 emit_move_insn (target, accum);
37110 if (! masked)
37111 emit_insn (fcn (target, accum, wide_reg, mem));
37112 else
37114 rtx merge, mask;
37115 merge = expand_normal (CALL_EXPR_ARG (exp, 6));
37117 mask = expand_normal (CALL_EXPR_ARG (exp, 7));
37119 if (CONST_INT_P (mask))
37120 mask = fixup_modeless_constant (mask, HImode);
37122 mask = force_reg (HImode, mask);
37124 if (GET_MODE (mask) != HImode)
37125 mask = gen_rtx_SUBREG (HImode, mask, 0);
37127 /* If merge is 0 then we're about to emit z-masked variant. */
37128 if (const0_operand (merge, mode))
37129 emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
37130 /* If merge is the same as accum then emit merge-masked variant. */
37131 else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
37133 merge = force_reg (mode, merge);
37134 emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
37136 /* Merge with something unknown might happen if we z-mask w/ -O0. */
37137 else
37139 target = gen_reg_rtx (mode);
37140 emit_move_insn (target, merge);
37141 emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
37144 return target;
37147 case IX86_BUILTIN_4FNMASS:
37148 fcn = gen_avx5124fmaddps_4fnmaddss;
37149 masked = 0;
37150 goto s4fma_expand;
37152 case IX86_BUILTIN_4FMASS:
37153 fcn = gen_avx5124fmaddps_4fmaddss;
37154 masked = 0;
37155 goto s4fma_expand;
37157 case IX86_BUILTIN_4FNMASS_MASK:
37158 fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask;
37159 fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz;
37160 goto s4fma_expand;
37162 case IX86_BUILTIN_4FMASS_MASK:
37164 tree args[4];
37165 rtx ops[4];
37166 rtx wide_reg;
37167 rtx accum;
37168 rtx addr;
37169 rtx mem;
37171 fcn_mask = gen_avx5124fmaddps_4fmaddss_mask;
37172 fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz;
37174 s4fma_expand:
37175 mode = V4SFmode;
37176 wide_reg = gen_reg_rtx (V64SFmode);
37177 for (i = 0; i < 4; i++)
37179 rtx tmp;
37180 args[i] = CALL_EXPR_ARG (exp, i);
37181 ops[i] = expand_normal (args[i]);
37183 tmp = gen_reg_rtx (SFmode);
37184 emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0));
37186 emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64),
37187 gen_rtx_SUBREG (V16SFmode, tmp, 0));
37190 accum = expand_normal (CALL_EXPR_ARG (exp, 4));
37191 accum = force_reg (V4SFmode, accum);
37193 addr = expand_normal (CALL_EXPR_ARG (exp, 5));
37194 addr = force_reg (Pmode, addr);
37196 mem = gen_rtx_MEM (V4SFmode, addr);
37198 target = gen_reg_rtx (V4SFmode);
37200 emit_move_insn (target, accum);
37202 if (! masked)
37203 emit_insn (fcn (target, accum, wide_reg, mem));
37204 else
37206 rtx merge, mask;
37207 merge = expand_normal (CALL_EXPR_ARG (exp, 6));
37209 mask = expand_normal (CALL_EXPR_ARG (exp, 7));
37211 if (CONST_INT_P (mask))
37212 mask = fixup_modeless_constant (mask, QImode);
37214 mask = force_reg (QImode, mask);
37216 if (GET_MODE (mask) != QImode)
37217 mask = gen_rtx_SUBREG (QImode, mask, 0);
37219 /* If merge is 0 then we're about to emit z-masked variant. */
37220 if (const0_operand (merge, mode))
37221 emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
37222 /* If merge is the same as accum then emit merge-masked
37223 variant. */
37224 else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
37226 merge = force_reg (mode, merge);
37227 emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
37229 /* Merge with something unknown might happen if we z-mask
37230 w/ -O0. */
37231 else
37233 target = gen_reg_rtx (mode);
37234 emit_move_insn (target, merge);
37235 emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
37238 return target;
37240 case IX86_BUILTIN_RDPID:
37241 return ix86_expand_special_args_builtin (bdesc_args2 + i, exp,
37242 target);
37243 default:
37244 return ix86_expand_args_builtin (bdesc_args2 + i, exp, target);
37248 if (fcode >= IX86_BUILTIN__BDESC_COMI_FIRST
37249 && fcode <= IX86_BUILTIN__BDESC_COMI_LAST)
37251 i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST;
37252 return ix86_expand_sse_comi (bdesc_comi + i, exp, target);
37255 if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
37256 && fcode <= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST)
37258 i = fcode - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST;
37259 return ix86_expand_round_builtin (bdesc_round_args + i, exp, target);
37262 if (fcode >= IX86_BUILTIN__BDESC_PCMPESTR_FIRST
37263 && fcode <= IX86_BUILTIN__BDESC_PCMPESTR_LAST)
37265 i = fcode - IX86_BUILTIN__BDESC_PCMPESTR_FIRST;
37266 return ix86_expand_sse_pcmpestr (bdesc_pcmpestr + i, exp, target);
37269 if (fcode >= IX86_BUILTIN__BDESC_PCMPISTR_FIRST
37270 && fcode <= IX86_BUILTIN__BDESC_PCMPISTR_LAST)
37272 i = fcode - IX86_BUILTIN__BDESC_PCMPISTR_FIRST;
37273 return ix86_expand_sse_pcmpistr (bdesc_pcmpistr + i, exp, target);
37276 if (fcode >= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
37277 && fcode <= IX86_BUILTIN__BDESC_MULTI_ARG_LAST)
37279 i = fcode - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST;
37280 const struct builtin_description *d = bdesc_multi_arg + i;
37281 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
37282 (enum ix86_builtin_func_type)
37283 d->flag, d->comparison);
37286 if (fcode >= IX86_BUILTIN__BDESC_CET_FIRST
37287 && fcode <= IX86_BUILTIN__BDESC_CET_LAST)
37289 i = fcode - IX86_BUILTIN__BDESC_CET_FIRST;
37290 return ix86_expand_special_args_builtin (bdesc_cet + i, exp,
37291 target);
37294 if (fcode >= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST
37295 && fcode <= IX86_BUILTIN__BDESC_CET_NORMAL_LAST)
37297 i = fcode - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST;
37298 return ix86_expand_args_builtin (bdesc_cet_rdssp + i, exp,
37299 target);
37302 gcc_unreachable ();
37305 /* This returns the target-specific builtin with code CODE if
37306 current_function_decl has visibility on this builtin, which is checked
37307 using isa flags. Returns NULL_TREE otherwise. */
37309 static tree ix86_get_builtin (enum ix86_builtins code)
37311 struct cl_target_option *opts;
37312 tree target_tree = NULL_TREE;
37314 /* Determine the isa flags of current_function_decl. */
37316 if (current_function_decl)
37317 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
37319 if (target_tree == NULL)
37320 target_tree = target_option_default_node;
37322 opts = TREE_TARGET_OPTION (target_tree);
37324 if ((ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
37325 || (ix86_builtins_isa[(int) code].isa2 & opts->x_ix86_isa_flags2))
37326 return ix86_builtin_decl (code, true);
37327 else
37328 return NULL_TREE;
37331 /* Return function decl for target specific builtin
37332 for given MPX builtin passed i FCODE. */
37333 static tree
37334 ix86_builtin_mpx_function (unsigned fcode)
37336 switch (fcode)
37338 case BUILT_IN_CHKP_BNDMK:
37339 return ix86_builtins[IX86_BUILTIN_BNDMK];
37341 case BUILT_IN_CHKP_BNDSTX:
37342 return ix86_builtins[IX86_BUILTIN_BNDSTX];
37344 case BUILT_IN_CHKP_BNDLDX:
37345 return ix86_builtins[IX86_BUILTIN_BNDLDX];
37347 case BUILT_IN_CHKP_BNDCL:
37348 return ix86_builtins[IX86_BUILTIN_BNDCL];
37350 case BUILT_IN_CHKP_BNDCU:
37351 return ix86_builtins[IX86_BUILTIN_BNDCU];
37353 case BUILT_IN_CHKP_BNDRET:
37354 return ix86_builtins[IX86_BUILTIN_BNDRET];
37356 case BUILT_IN_CHKP_INTERSECT:
37357 return ix86_builtins[IX86_BUILTIN_BNDINT];
37359 case BUILT_IN_CHKP_NARROW:
37360 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
37362 case BUILT_IN_CHKP_SIZEOF:
37363 return ix86_builtins[IX86_BUILTIN_SIZEOF];
37365 case BUILT_IN_CHKP_EXTRACT_LOWER:
37366 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
37368 case BUILT_IN_CHKP_EXTRACT_UPPER:
37369 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
37371 default:
37372 return NULL_TREE;
37375 gcc_unreachable ();
37378 /* Helper function for ix86_load_bounds and ix86_store_bounds.
37380 Return an address to be used to load/store bounds for pointer
37381 passed in SLOT.
37383 SLOT_NO is an integer constant holding number of a target
37384 dependent special slot to be used in case SLOT is not a memory.
37386 SPECIAL_BASE is a pointer to be used as a base of fake address
37387 to access special slots in Bounds Table. SPECIAL_BASE[-1],
37388 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
37390 static rtx
37391 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
37393 rtx addr = NULL;
37395 /* NULL slot means we pass bounds for pointer not passed to the
37396 function at all. Register slot means we pass pointer in a
37397 register. In both these cases bounds are passed via Bounds
37398 Table. Since we do not have actual pointer stored in memory,
37399 we have to use fake addresses to access Bounds Table. We
37400 start with (special_base - sizeof (void*)) and decrease this
37401 address by pointer size to get addresses for other slots. */
37402 if (!slot || REG_P (slot))
37404 gcc_assert (CONST_INT_P (slot_no));
37405 addr = plus_constant (Pmode, special_base,
37406 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
37408 /* If pointer is passed in a memory then its address is used to
37409 access Bounds Table. */
37410 else if (MEM_P (slot))
37412 addr = XEXP (slot, 0);
37413 if (!register_operand (addr, Pmode))
37414 addr = copy_addr_to_reg (addr);
37416 else
37417 gcc_unreachable ();
37419 return addr;
37422 /* Expand pass uses this hook to load bounds for function parameter
37423 PTR passed in SLOT in case its bounds are not passed in a register.
37425 If SLOT is a memory, then bounds are loaded as for regular pointer
37426 loaded from memory. PTR may be NULL in case SLOT is a memory.
37427 In such case value of PTR (if required) may be loaded from SLOT.
37429 If SLOT is NULL or a register then SLOT_NO is an integer constant
37430 holding number of the target dependent special slot which should be
37431 used to obtain bounds.
37433 Return loaded bounds. */
37435 static rtx
37436 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
37438 rtx reg = gen_reg_rtx (BNDmode);
37439 rtx addr;
37441 /* Get address to be used to access Bounds Table. Special slots start
37442 at the location of return address of the current function. */
37443 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
37445 /* Load pointer value from a memory if we don't have it. */
37446 if (!ptr)
37448 gcc_assert (MEM_P (slot));
37449 ptr = copy_addr_to_reg (slot);
37452 if (!register_operand (ptr, Pmode))
37453 ptr = ix86_zero_extend_to_Pmode (ptr);
37455 emit_insn (BNDmode == BND64mode
37456 ? gen_bnd64_ldx (reg, addr, ptr)
37457 : gen_bnd32_ldx (reg, addr, ptr));
37459 return reg;
37462 /* Expand pass uses this hook to store BOUNDS for call argument PTR
37463 passed in SLOT in case BOUNDS are not passed in a register.
37465 If SLOT is a memory, then BOUNDS are stored as for regular pointer
37466 stored in memory. PTR may be NULL in case SLOT is a memory.
37467 In such case value of PTR (if required) may be loaded from SLOT.
37469 If SLOT is NULL or a register then SLOT_NO is an integer constant
37470 holding number of the target dependent special slot which should be
37471 used to store BOUNDS. */
37473 static void
37474 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
37476 rtx addr;
37478 /* Get address to be used to access Bounds Table. Special slots start
37479 at the location of return address of a called function. */
37480 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
37482 /* Load pointer value from a memory if we don't have it. */
37483 if (!ptr)
37485 gcc_assert (MEM_P (slot));
37486 ptr = copy_addr_to_reg (slot);
37489 if (!register_operand (ptr, Pmode))
37490 ptr = ix86_zero_extend_to_Pmode (ptr);
37492 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
37493 if (!register_operand (bounds, BNDmode))
37494 bounds = copy_to_mode_reg (BNDmode, bounds);
37496 emit_insn (BNDmode == BND64mode
37497 ? gen_bnd64_stx (addr, ptr, bounds)
37498 : gen_bnd32_stx (addr, ptr, bounds));
37501 /* Load and return bounds returned by function in SLOT. */
37503 static rtx
37504 ix86_load_returned_bounds (rtx slot)
37506 rtx res;
37508 gcc_assert (REG_P (slot));
37509 res = gen_reg_rtx (BNDmode);
37510 emit_move_insn (res, slot);
37512 return res;
37515 /* Store BOUNDS returned by function into SLOT. */
37517 static void
37518 ix86_store_returned_bounds (rtx slot, rtx bounds)
37520 gcc_assert (REG_P (slot));
37521 emit_move_insn (slot, bounds);
37524 /* Returns a function decl for a vectorized version of the combined function
37525 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
37526 if it is not available. */
37528 static tree
37529 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
37530 tree type_in)
37532 machine_mode in_mode, out_mode;
37533 int in_n, out_n;
37535 if (TREE_CODE (type_out) != VECTOR_TYPE
37536 || TREE_CODE (type_in) != VECTOR_TYPE)
37537 return NULL_TREE;
37539 out_mode = TYPE_MODE (TREE_TYPE (type_out));
37540 out_n = TYPE_VECTOR_SUBPARTS (type_out);
37541 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37542 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37544 switch (fn)
37546 CASE_CFN_EXP2:
37547 if (out_mode == SFmode && in_mode == SFmode)
37549 if (out_n == 16 && in_n == 16)
37550 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
37552 break;
37554 CASE_CFN_IFLOOR:
37555 CASE_CFN_LFLOOR:
37556 CASE_CFN_LLFLOOR:
37557 /* The round insn does not trap on denormals. */
37558 if (flag_trapping_math || !TARGET_SSE4_1)
37559 break;
37561 if (out_mode == SImode && in_mode == DFmode)
37563 if (out_n == 4 && in_n == 2)
37564 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
37565 else if (out_n == 8 && in_n == 4)
37566 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
37567 else if (out_n == 16 && in_n == 8)
37568 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
37570 if (out_mode == SImode && in_mode == SFmode)
37572 if (out_n == 4 && in_n == 4)
37573 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
37574 else if (out_n == 8 && in_n == 8)
37575 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
37576 else if (out_n == 16 && in_n == 16)
37577 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512);
37579 break;
37581 CASE_CFN_ICEIL:
37582 CASE_CFN_LCEIL:
37583 CASE_CFN_LLCEIL:
37584 /* The round insn does not trap on denormals. */
37585 if (flag_trapping_math || !TARGET_SSE4_1)
37586 break;
37588 if (out_mode == SImode && in_mode == DFmode)
37590 if (out_n == 4 && in_n == 2)
37591 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
37592 else if (out_n == 8 && in_n == 4)
37593 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
37594 else if (out_n == 16 && in_n == 8)
37595 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
37597 if (out_mode == SImode && in_mode == SFmode)
37599 if (out_n == 4 && in_n == 4)
37600 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
37601 else if (out_n == 8 && in_n == 8)
37602 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
37603 else if (out_n == 16 && in_n == 16)
37604 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512);
37606 break;
37608 CASE_CFN_IRINT:
37609 CASE_CFN_LRINT:
37610 CASE_CFN_LLRINT:
37611 if (out_mode == SImode && in_mode == DFmode)
37613 if (out_n == 4 && in_n == 2)
37614 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
37615 else if (out_n == 8 && in_n == 4)
37616 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
37617 else if (out_n == 16 && in_n == 8)
37618 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512);
37620 if (out_mode == SImode && in_mode == SFmode)
37622 if (out_n == 4 && in_n == 4)
37623 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
37624 else if (out_n == 8 && in_n == 8)
37625 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
37626 else if (out_n == 16 && in_n == 16)
37627 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512);
37629 break;
37631 CASE_CFN_IROUND:
37632 CASE_CFN_LROUND:
37633 CASE_CFN_LLROUND:
37634 /* The round insn does not trap on denormals. */
37635 if (flag_trapping_math || !TARGET_SSE4_1)
37636 break;
37638 if (out_mode == SImode && in_mode == DFmode)
37640 if (out_n == 4 && in_n == 2)
37641 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
37642 else if (out_n == 8 && in_n == 4)
37643 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
37644 else if (out_n == 16 && in_n == 8)
37645 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
37647 if (out_mode == SImode && in_mode == SFmode)
37649 if (out_n == 4 && in_n == 4)
37650 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
37651 else if (out_n == 8 && in_n == 8)
37652 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
37653 else if (out_n == 16 && in_n == 16)
37654 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512);
37656 break;
37658 CASE_CFN_FLOOR:
37659 /* The round insn does not trap on denormals. */
37660 if (flag_trapping_math || !TARGET_SSE4_1)
37661 break;
37663 if (out_mode == DFmode && in_mode == DFmode)
37665 if (out_n == 2 && in_n == 2)
37666 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
37667 else if (out_n == 4 && in_n == 4)
37668 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
37669 else if (out_n == 8 && in_n == 8)
37670 return ix86_get_builtin (IX86_BUILTIN_FLOORPD512);
37672 if (out_mode == SFmode && in_mode == SFmode)
37674 if (out_n == 4 && in_n == 4)
37675 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
37676 else if (out_n == 8 && in_n == 8)
37677 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
37678 else if (out_n == 16 && in_n == 16)
37679 return ix86_get_builtin (IX86_BUILTIN_FLOORPS512);
37681 break;
37683 CASE_CFN_CEIL:
37684 /* The round insn does not trap on denormals. */
37685 if (flag_trapping_math || !TARGET_SSE4_1)
37686 break;
37688 if (out_mode == DFmode && in_mode == DFmode)
37690 if (out_n == 2 && in_n == 2)
37691 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
37692 else if (out_n == 4 && in_n == 4)
37693 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
37694 else if (out_n == 8 && in_n == 8)
37695 return ix86_get_builtin (IX86_BUILTIN_CEILPD512);
37697 if (out_mode == SFmode && in_mode == SFmode)
37699 if (out_n == 4 && in_n == 4)
37700 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
37701 else if (out_n == 8 && in_n == 8)
37702 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
37703 else if (out_n == 16 && in_n == 16)
37704 return ix86_get_builtin (IX86_BUILTIN_CEILPS512);
37706 break;
37708 CASE_CFN_TRUNC:
37709 /* The round insn does not trap on denormals. */
37710 if (flag_trapping_math || !TARGET_SSE4_1)
37711 break;
37713 if (out_mode == DFmode && in_mode == DFmode)
37715 if (out_n == 2 && in_n == 2)
37716 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
37717 else if (out_n == 4 && in_n == 4)
37718 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
37719 else if (out_n == 8 && in_n == 8)
37720 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512);
37722 if (out_mode == SFmode && in_mode == SFmode)
37724 if (out_n == 4 && in_n == 4)
37725 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
37726 else if (out_n == 8 && in_n == 8)
37727 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
37728 else if (out_n == 16 && in_n == 16)
37729 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512);
37731 break;
37733 CASE_CFN_RINT:
37734 /* The round insn does not trap on denormals. */
37735 if (flag_trapping_math || !TARGET_SSE4_1)
37736 break;
37738 if (out_mode == DFmode && in_mode == DFmode)
37740 if (out_n == 2 && in_n == 2)
37741 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
37742 else if (out_n == 4 && in_n == 4)
37743 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
37745 if (out_mode == SFmode && in_mode == SFmode)
37747 if (out_n == 4 && in_n == 4)
37748 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
37749 else if (out_n == 8 && in_n == 8)
37750 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
37752 break;
37754 CASE_CFN_FMA:
37755 if (out_mode == DFmode && in_mode == DFmode)
37757 if (out_n == 2 && in_n == 2)
37758 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
37759 if (out_n == 4 && in_n == 4)
37760 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
37762 if (out_mode == SFmode && in_mode == SFmode)
37764 if (out_n == 4 && in_n == 4)
37765 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
37766 if (out_n == 8 && in_n == 8)
37767 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
37769 break;
37771 default:
37772 break;
37775 /* Dispatch to a handler for a vectorization library. */
37776 if (ix86_veclib_handler)
37777 return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
37779 return NULL_TREE;
37782 /* Handler for an SVML-style interface to
37783 a library with vectorized intrinsics. */
37785 static tree
37786 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
37788 char name[20];
37789 tree fntype, new_fndecl, args;
37790 unsigned arity;
37791 const char *bname;
37792 machine_mode el_mode, in_mode;
37793 int n, in_n;
37795 /* The SVML is suitable for unsafe math only. */
37796 if (!flag_unsafe_math_optimizations)
37797 return NULL_TREE;
37799 el_mode = TYPE_MODE (TREE_TYPE (type_out));
37800 n = TYPE_VECTOR_SUBPARTS (type_out);
37801 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37802 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37803 if (el_mode != in_mode
37804 || n != in_n)
37805 return NULL_TREE;
37807 switch (fn)
37809 CASE_CFN_EXP:
37810 CASE_CFN_LOG:
37811 CASE_CFN_LOG10:
37812 CASE_CFN_POW:
37813 CASE_CFN_TANH:
37814 CASE_CFN_TAN:
37815 CASE_CFN_ATAN:
37816 CASE_CFN_ATAN2:
37817 CASE_CFN_ATANH:
37818 CASE_CFN_CBRT:
37819 CASE_CFN_SINH:
37820 CASE_CFN_SIN:
37821 CASE_CFN_ASINH:
37822 CASE_CFN_ASIN:
37823 CASE_CFN_COSH:
37824 CASE_CFN_COS:
37825 CASE_CFN_ACOSH:
37826 CASE_CFN_ACOS:
37827 if ((el_mode != DFmode || n != 2)
37828 && (el_mode != SFmode || n != 4))
37829 return NULL_TREE;
37830 break;
37832 default:
37833 return NULL_TREE;
37836 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
37837 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
37839 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
37840 strcpy (name, "vmlsLn4");
37841 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
37842 strcpy (name, "vmldLn2");
37843 else if (n == 4)
37845 sprintf (name, "vmls%s", bname+10);
37846 name[strlen (name)-1] = '4';
37848 else
37849 sprintf (name, "vmld%s2", bname+10);
37851 /* Convert to uppercase. */
37852 name[4] &= ~0x20;
37854 arity = 0;
37855 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
37856 arity++;
37858 if (arity == 1)
37859 fntype = build_function_type_list (type_out, type_in, NULL);
37860 else
37861 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
37863 /* Build a function declaration for the vectorized function. */
37864 new_fndecl = build_decl (BUILTINS_LOCATION,
37865 FUNCTION_DECL, get_identifier (name), fntype);
37866 TREE_PUBLIC (new_fndecl) = 1;
37867 DECL_EXTERNAL (new_fndecl) = 1;
37868 DECL_IS_NOVOPS (new_fndecl) = 1;
37869 TREE_READONLY (new_fndecl) = 1;
37871 return new_fndecl;
37874 /* Handler for an ACML-style interface to
37875 a library with vectorized intrinsics. */
37877 static tree
37878 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
37880 char name[20] = "__vr.._";
37881 tree fntype, new_fndecl, args;
37882 unsigned arity;
37883 const char *bname;
37884 machine_mode el_mode, in_mode;
37885 int n, in_n;
37887 /* The ACML is 64bits only and suitable for unsafe math only as
37888 it does not correctly support parts of IEEE with the required
37889 precision such as denormals. */
37890 if (!TARGET_64BIT
37891 || !flag_unsafe_math_optimizations)
37892 return NULL_TREE;
37894 el_mode = TYPE_MODE (TREE_TYPE (type_out));
37895 n = TYPE_VECTOR_SUBPARTS (type_out);
37896 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37897 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37898 if (el_mode != in_mode
37899 || n != in_n)
37900 return NULL_TREE;
37902 switch (fn)
37904 CASE_CFN_SIN:
37905 CASE_CFN_COS:
37906 CASE_CFN_EXP:
37907 CASE_CFN_LOG:
37908 CASE_CFN_LOG2:
37909 CASE_CFN_LOG10:
37910 if (el_mode == DFmode && n == 2)
37912 name[4] = 'd';
37913 name[5] = '2';
37915 else if (el_mode == SFmode && n == 4)
37917 name[4] = 's';
37918 name[5] = '4';
37920 else
37921 return NULL_TREE;
37922 break;
37924 default:
37925 return NULL_TREE;
37928 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
37929 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
37930 sprintf (name + 7, "%s", bname+10);
37932 arity = 0;
37933 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
37934 arity++;
37936 if (arity == 1)
37937 fntype = build_function_type_list (type_out, type_in, NULL);
37938 else
37939 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
37941 /* Build a function declaration for the vectorized function. */
37942 new_fndecl = build_decl (BUILTINS_LOCATION,
37943 FUNCTION_DECL, get_identifier (name), fntype);
37944 TREE_PUBLIC (new_fndecl) = 1;
37945 DECL_EXTERNAL (new_fndecl) = 1;
37946 DECL_IS_NOVOPS (new_fndecl) = 1;
37947 TREE_READONLY (new_fndecl) = 1;
37949 return new_fndecl;
37952 /* Returns a decl of a function that implements gather load with
37953 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
37954 Return NULL_TREE if it is not available. */
37956 static tree
37957 ix86_vectorize_builtin_gather (const_tree mem_vectype,
37958 const_tree index_type, int scale)
37960 bool si;
37961 enum ix86_builtins code;
37963 if (! TARGET_AVX2)
37964 return NULL_TREE;
37966 if ((TREE_CODE (index_type) != INTEGER_TYPE
37967 && !POINTER_TYPE_P (index_type))
37968 || (TYPE_MODE (index_type) != SImode
37969 && TYPE_MODE (index_type) != DImode))
37970 return NULL_TREE;
37972 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
37973 return NULL_TREE;
37975 /* v*gather* insn sign extends index to pointer mode. */
37976 if (TYPE_PRECISION (index_type) < POINTER_SIZE
37977 && TYPE_UNSIGNED (index_type))
37978 return NULL_TREE;
37980 if (scale <= 0
37981 || scale > 8
37982 || (scale & (scale - 1)) != 0)
37983 return NULL_TREE;
37985 si = TYPE_MODE (index_type) == SImode;
37986 switch (TYPE_MODE (mem_vectype))
37988 case E_V2DFmode:
37989 if (TARGET_AVX512VL)
37990 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
37991 else
37992 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
37993 break;
37994 case E_V4DFmode:
37995 if (TARGET_AVX512VL)
37996 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
37997 else
37998 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
37999 break;
38000 case E_V2DImode:
38001 if (TARGET_AVX512VL)
38002 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
38003 else
38004 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
38005 break;
38006 case E_V4DImode:
38007 if (TARGET_AVX512VL)
38008 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
38009 else
38010 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
38011 break;
38012 case E_V4SFmode:
38013 if (TARGET_AVX512VL)
38014 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
38015 else
38016 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
38017 break;
38018 case E_V8SFmode:
38019 if (TARGET_AVX512VL)
38020 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
38021 else
38022 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
38023 break;
38024 case E_V4SImode:
38025 if (TARGET_AVX512VL)
38026 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
38027 else
38028 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
38029 break;
38030 case E_V8SImode:
38031 if (TARGET_AVX512VL)
38032 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
38033 else
38034 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
38035 break;
38036 case E_V8DFmode:
38037 if (TARGET_AVX512F)
38038 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
38039 else
38040 return NULL_TREE;
38041 break;
38042 case E_V8DImode:
38043 if (TARGET_AVX512F)
38044 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
38045 else
38046 return NULL_TREE;
38047 break;
38048 case E_V16SFmode:
38049 if (TARGET_AVX512F)
38050 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
38051 else
38052 return NULL_TREE;
38053 break;
38054 case E_V16SImode:
38055 if (TARGET_AVX512F)
38056 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
38057 else
38058 return NULL_TREE;
38059 break;
38060 default:
38061 return NULL_TREE;
38064 return ix86_get_builtin (code);
38067 /* Returns a decl of a function that implements scatter store with
38068 register type VECTYPE and index type INDEX_TYPE and SCALE.
38069 Return NULL_TREE if it is not available. */
38071 static tree
38072 ix86_vectorize_builtin_scatter (const_tree vectype,
38073 const_tree index_type, int scale)
38075 bool si;
38076 enum ix86_builtins code;
38078 if (!TARGET_AVX512F)
38079 return NULL_TREE;
38081 if ((TREE_CODE (index_type) != INTEGER_TYPE
38082 && !POINTER_TYPE_P (index_type))
38083 || (TYPE_MODE (index_type) != SImode
38084 && TYPE_MODE (index_type) != DImode))
38085 return NULL_TREE;
38087 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
38088 return NULL_TREE;
38090 /* v*scatter* insn sign extends index to pointer mode. */
38091 if (TYPE_PRECISION (index_type) < POINTER_SIZE
38092 && TYPE_UNSIGNED (index_type))
38093 return NULL_TREE;
38095 /* Scale can be 1, 2, 4 or 8. */
38096 if (scale <= 0
38097 || scale > 8
38098 || (scale & (scale - 1)) != 0)
38099 return NULL_TREE;
38101 si = TYPE_MODE (index_type) == SImode;
38102 switch (TYPE_MODE (vectype))
38104 case E_V8DFmode:
38105 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
38106 break;
38107 case E_V8DImode:
38108 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
38109 break;
38110 case E_V16SFmode:
38111 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
38112 break;
38113 case E_V16SImode:
38114 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
38115 break;
38116 default:
38117 return NULL_TREE;
38120 return ix86_builtins[code];
38123 /* Return true if it is safe to use the rsqrt optabs to optimize
38124 1.0/sqrt. */
38126 static bool
38127 use_rsqrt_p ()
38129 return (TARGET_SSE_MATH
38130 && flag_finite_math_only
38131 && !flag_trapping_math
38132 && flag_unsafe_math_optimizations);
38135 /* Returns a code for a target-specific builtin that implements
38136 reciprocal of the function, or NULL_TREE if not available. */
38138 static tree
38139 ix86_builtin_reciprocal (tree fndecl)
38141 switch (DECL_FUNCTION_CODE (fndecl))
38143 /* Vectorized version of sqrt to rsqrt conversion. */
38144 case IX86_BUILTIN_SQRTPS_NR:
38145 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
38147 case IX86_BUILTIN_SQRTPS_NR256:
38148 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
38150 default:
38151 return NULL_TREE;
38155 /* Helper for avx_vpermilps256_operand et al. This is also used by
38156 the expansion functions to turn the parallel back into a mask.
38157 The return value is 0 for no match and the imm8+1 for a match. */
38160 avx_vpermilp_parallel (rtx par, machine_mode mode)
38162 unsigned i, nelt = GET_MODE_NUNITS (mode);
38163 unsigned mask = 0;
38164 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
38166 if (XVECLEN (par, 0) != (int) nelt)
38167 return 0;
38169 /* Validate that all of the elements are constants, and not totally
38170 out of range. Copy the data into an integral array to make the
38171 subsequent checks easier. */
38172 for (i = 0; i < nelt; ++i)
38174 rtx er = XVECEXP (par, 0, i);
38175 unsigned HOST_WIDE_INT ei;
38177 if (!CONST_INT_P (er))
38178 return 0;
38179 ei = INTVAL (er);
38180 if (ei >= nelt)
38181 return 0;
38182 ipar[i] = ei;
38185 switch (mode)
38187 case E_V8DFmode:
38188 /* In the 512-bit DFmode case, we can only move elements within
38189 a 128-bit lane. First fill the second part of the mask,
38190 then fallthru. */
38191 for (i = 4; i < 6; ++i)
38193 if (ipar[i] < 4 || ipar[i] >= 6)
38194 return 0;
38195 mask |= (ipar[i] - 4) << i;
38197 for (i = 6; i < 8; ++i)
38199 if (ipar[i] < 6)
38200 return 0;
38201 mask |= (ipar[i] - 6) << i;
38203 /* FALLTHRU */
38205 case E_V4DFmode:
38206 /* In the 256-bit DFmode case, we can only move elements within
38207 a 128-bit lane. */
38208 for (i = 0; i < 2; ++i)
38210 if (ipar[i] >= 2)
38211 return 0;
38212 mask |= ipar[i] << i;
38214 for (i = 2; i < 4; ++i)
38216 if (ipar[i] < 2)
38217 return 0;
38218 mask |= (ipar[i] - 2) << i;
38220 break;
38222 case E_V16SFmode:
38223 /* In 512 bit SFmode case, permutation in the upper 256 bits
38224 must mirror the permutation in the lower 256-bits. */
38225 for (i = 0; i < 8; ++i)
38226 if (ipar[i] + 8 != ipar[i + 8])
38227 return 0;
38228 /* FALLTHRU */
38230 case E_V8SFmode:
38231 /* In 256 bit SFmode case, we have full freedom of
38232 movement within the low 128-bit lane, but the high 128-bit
38233 lane must mirror the exact same pattern. */
38234 for (i = 0; i < 4; ++i)
38235 if (ipar[i] + 4 != ipar[i + 4])
38236 return 0;
38237 nelt = 4;
38238 /* FALLTHRU */
38240 case E_V2DFmode:
38241 case E_V4SFmode:
38242 /* In the 128-bit case, we've full freedom in the placement of
38243 the elements from the source operand. */
38244 for (i = 0; i < nelt; ++i)
38245 mask |= ipar[i] << (i * (nelt / 2));
38246 break;
38248 default:
38249 gcc_unreachable ();
38252 /* Make sure success has a non-zero value by adding one. */
38253 return mask + 1;
38256 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
38257 the expansion functions to turn the parallel back into a mask.
38258 The return value is 0 for no match and the imm8+1 for a match. */
38261 avx_vperm2f128_parallel (rtx par, machine_mode mode)
38263 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
38264 unsigned mask = 0;
38265 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
38267 if (XVECLEN (par, 0) != (int) nelt)
38268 return 0;
38270 /* Validate that all of the elements are constants, and not totally
38271 out of range. Copy the data into an integral array to make the
38272 subsequent checks easier. */
38273 for (i = 0; i < nelt; ++i)
38275 rtx er = XVECEXP (par, 0, i);
38276 unsigned HOST_WIDE_INT ei;
38278 if (!CONST_INT_P (er))
38279 return 0;
38280 ei = INTVAL (er);
38281 if (ei >= 2 * nelt)
38282 return 0;
38283 ipar[i] = ei;
38286 /* Validate that the halves of the permute are halves. */
38287 for (i = 0; i < nelt2 - 1; ++i)
38288 if (ipar[i] + 1 != ipar[i + 1])
38289 return 0;
38290 for (i = nelt2; i < nelt - 1; ++i)
38291 if (ipar[i] + 1 != ipar[i + 1])
38292 return 0;
38294 /* Reconstruct the mask. */
38295 for (i = 0; i < 2; ++i)
38297 unsigned e = ipar[i * nelt2];
38298 if (e % nelt2)
38299 return 0;
38300 e /= nelt2;
38301 mask |= e << (i * 4);
38304 /* Make sure success has a non-zero value by adding one. */
38305 return mask + 1;
38308 /* Return a register priority for hard reg REGNO. */
38309 static int
38310 ix86_register_priority (int hard_regno)
38312 /* ebp and r13 as the base always wants a displacement, r12 as the
38313 base always wants an index. So discourage their usage in an
38314 address. */
38315 if (hard_regno == R12_REG || hard_regno == R13_REG)
38316 return 0;
38317 if (hard_regno == BP_REG)
38318 return 1;
38319 /* New x86-64 int registers result in bigger code size. Discourage
38320 them. */
38321 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
38322 return 2;
38323 /* New x86-64 SSE registers result in bigger code size. Discourage
38324 them. */
38325 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
38326 return 2;
38327 /* Usage of AX register results in smaller code. Prefer it. */
38328 if (hard_regno == AX_REG)
38329 return 4;
38330 return 3;
38333 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
38335 Put float CONST_DOUBLE in the constant pool instead of fp regs.
38336 QImode must go into class Q_REGS.
38337 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
38338 movdf to do mem-to-mem moves through integer regs. */
38340 static reg_class_t
38341 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
38343 machine_mode mode = GET_MODE (x);
38345 /* We're only allowed to return a subclass of CLASS. Many of the
38346 following checks fail for NO_REGS, so eliminate that early. */
38347 if (regclass == NO_REGS)
38348 return NO_REGS;
38350 /* All classes can load zeros. */
38351 if (x == CONST0_RTX (mode))
38352 return regclass;
38354 /* Force constants into memory if we are loading a (nonzero) constant into
38355 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
38356 instructions to load from a constant. */
38357 if (CONSTANT_P (x)
38358 && (MAYBE_MMX_CLASS_P (regclass)
38359 || MAYBE_SSE_CLASS_P (regclass)
38360 || MAYBE_MASK_CLASS_P (regclass)))
38361 return NO_REGS;
38363 /* Floating-point constants need more complex checks. */
38364 if (CONST_DOUBLE_P (x))
38366 /* General regs can load everything. */
38367 if (INTEGER_CLASS_P (regclass))
38368 return regclass;
38370 /* Floats can load 0 and 1 plus some others. Note that we eliminated
38371 zero above. We only want to wind up preferring 80387 registers if
38372 we plan on doing computation with them. */
38373 if (IS_STACK_MODE (mode)
38374 && standard_80387_constant_p (x) > 0)
38376 /* Limit class to FP regs. */
38377 if (FLOAT_CLASS_P (regclass))
38378 return FLOAT_REGS;
38379 else if (regclass == FP_TOP_SSE_REGS)
38380 return FP_TOP_REG;
38381 else if (regclass == FP_SECOND_SSE_REGS)
38382 return FP_SECOND_REG;
38385 return NO_REGS;
38388 /* Prefer SSE regs only, if we can use them for math. */
38389 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38390 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
38392 /* Generally when we see PLUS here, it's the function invariant
38393 (plus soft-fp const_int). Which can only be computed into general
38394 regs. */
38395 if (GET_CODE (x) == PLUS)
38396 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
38398 /* QImode constants are easy to load, but non-constant QImode data
38399 must go into Q_REGS. */
38400 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
38402 if (Q_CLASS_P (regclass))
38403 return regclass;
38404 else if (reg_class_subset_p (Q_REGS, regclass))
38405 return Q_REGS;
38406 else
38407 return NO_REGS;
38410 return regclass;
38413 /* Discourage putting floating-point values in SSE registers unless
38414 SSE math is being used, and likewise for the 387 registers. */
38415 static reg_class_t
38416 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
38418 machine_mode mode = GET_MODE (x);
38420 /* Restrict the output reload class to the register bank that we are doing
38421 math on. If we would like not to return a subset of CLASS, reject this
38422 alternative: if reload cannot do this, it will still use its choice. */
38423 mode = GET_MODE (x);
38424 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38425 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
38427 if (IS_STACK_MODE (mode))
38429 if (regclass == FP_TOP_SSE_REGS)
38430 return FP_TOP_REG;
38431 else if (regclass == FP_SECOND_SSE_REGS)
38432 return FP_SECOND_REG;
38433 else
38434 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
38437 return regclass;
38440 static reg_class_t
38441 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
38442 machine_mode mode, secondary_reload_info *sri)
38444 /* Double-word spills from general registers to non-offsettable memory
38445 references (zero-extended addresses) require special handling. */
38446 if (TARGET_64BIT
38447 && MEM_P (x)
38448 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
38449 && INTEGER_CLASS_P (rclass)
38450 && !offsettable_memref_p (x))
38452 sri->icode = (in_p
38453 ? CODE_FOR_reload_noff_load
38454 : CODE_FOR_reload_noff_store);
38455 /* Add the cost of moving address to a temporary. */
38456 sri->extra_cost = 1;
38458 return NO_REGS;
38461 /* QImode spills from non-QI registers require
38462 intermediate register on 32bit targets. */
38463 if (mode == QImode
38464 && ((!TARGET_64BIT && !in_p
38465 && INTEGER_CLASS_P (rclass)
38466 && MAYBE_NON_Q_CLASS_P (rclass))
38467 || (!TARGET_AVX512DQ
38468 && MAYBE_MASK_CLASS_P (rclass))))
38470 int regno = true_regnum (x);
38472 /* Return Q_REGS if the operand is in memory. */
38473 if (regno == -1)
38474 return Q_REGS;
38476 return NO_REGS;
38479 /* This condition handles corner case where an expression involving
38480 pointers gets vectorized. We're trying to use the address of a
38481 stack slot as a vector initializer.
38483 (set (reg:V2DI 74 [ vect_cst_.2 ])
38484 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
38486 Eventually frame gets turned into sp+offset like this:
38488 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38489 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
38490 (const_int 392 [0x188]))))
38492 That later gets turned into:
38494 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38495 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
38496 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
38498 We'll have the following reload recorded:
38500 Reload 0: reload_in (DI) =
38501 (plus:DI (reg/f:DI 7 sp)
38502 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
38503 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38504 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
38505 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
38506 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
38507 reload_reg_rtx: (reg:V2DI 22 xmm1)
38509 Which isn't going to work since SSE instructions can't handle scalar
38510 additions. Returning GENERAL_REGS forces the addition into integer
38511 register and reload can handle subsequent reloads without problems. */
38513 if (in_p && GET_CODE (x) == PLUS
38514 && SSE_CLASS_P (rclass)
38515 && SCALAR_INT_MODE_P (mode))
38516 return GENERAL_REGS;
38518 return NO_REGS;
38521 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
38523 static bool
38524 ix86_class_likely_spilled_p (reg_class_t rclass)
38526 switch (rclass)
38528 case AREG:
38529 case DREG:
38530 case CREG:
38531 case BREG:
38532 case AD_REGS:
38533 case SIREG:
38534 case DIREG:
38535 case SSE_FIRST_REG:
38536 case FP_TOP_REG:
38537 case FP_SECOND_REG:
38538 case BND_REGS:
38539 return true;
38541 default:
38542 break;
38545 return false;
38548 /* If we are copying between registers from different register sets
38549 (e.g. FP and integer), we may need a memory location.
38551 The function can't work reliably when one of the CLASSES is a class
38552 containing registers from multiple sets. We avoid this by never combining
38553 different sets in a single alternative in the machine description.
38554 Ensure that this constraint holds to avoid unexpected surprises.
38556 When STRICT is false, we are being called from REGISTER_MOVE_COST,
38557 so do not enforce these sanity checks.
38559 To optimize register_move_cost performance, define inline variant. */
38561 static inline bool
38562 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
38563 reg_class_t class2, int strict)
38565 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
38566 return false;
38568 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
38569 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
38570 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
38571 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
38572 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
38573 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
38574 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
38575 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
38577 gcc_assert (!strict || lra_in_progress);
38578 return true;
38581 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
38582 return true;
38584 /* Between mask and general, we have moves no larger than word size. */
38585 if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
38586 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
38587 return true;
38589 /* ??? This is a lie. We do have moves between mmx/general, and for
38590 mmx/sse2. But by saying we need secondary memory we discourage the
38591 register allocator from using the mmx registers unless needed. */
38592 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
38593 return true;
38595 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
38597 /* SSE1 doesn't have any direct moves from other classes. */
38598 if (!TARGET_SSE2)
38599 return true;
38601 /* If the target says that inter-unit moves are more expensive
38602 than moving through memory, then don't generate them. */
38603 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
38604 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
38605 return true;
38607 /* Between SSE and general, we have moves no larger than word size. */
38608 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38609 return true;
38612 return false;
38615 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
38617 static bool
38618 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
38619 reg_class_t class2)
38621 return inline_secondary_memory_needed (mode, class1, class2, true);
38624 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
38626 get_secondary_mem widens integral modes to BITS_PER_WORD.
38627 There is no need to emit full 64 bit move on 64 bit targets
38628 for integral modes that can be moved using 32 bit move. */
38630 static machine_mode
38631 ix86_secondary_memory_needed_mode (machine_mode mode)
38633 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
38634 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
38635 return mode;
38638 /* Implement the TARGET_CLASS_MAX_NREGS hook.
38640 On the 80386, this is the size of MODE in words,
38641 except in the FP regs, where a single reg is always enough. */
38643 static unsigned char
38644 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
38646 if (MAYBE_INTEGER_CLASS_P (rclass))
38648 if (mode == XFmode)
38649 return (TARGET_64BIT ? 2 : 3);
38650 else if (mode == XCmode)
38651 return (TARGET_64BIT ? 4 : 6);
38652 else
38653 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
38655 else
38657 if (COMPLEX_MODE_P (mode))
38658 return 2;
38659 else
38660 return 1;
38664 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
38666 static bool
38667 ix86_can_change_mode_class (machine_mode from, machine_mode to,
38668 reg_class_t regclass)
38670 if (from == to)
38671 return true;
38673 /* x87 registers can't do subreg at all, as all values are reformatted
38674 to extended precision. */
38675 if (MAYBE_FLOAT_CLASS_P (regclass))
38676 return false;
38678 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
38680 /* Vector registers do not support QI or HImode loads. If we don't
38681 disallow a change to these modes, reload will assume it's ok to
38682 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
38683 the vec_dupv4hi pattern. */
38684 if (GET_MODE_SIZE (from) < 4)
38685 return false;
38688 return true;
38691 /* Return index of MODE in the sse load/store tables. */
38693 static inline int
38694 sse_store_index (machine_mode mode)
38696 switch (GET_MODE_SIZE (mode))
38698 case 4:
38699 return 0;
38700 case 8:
38701 return 1;
38702 case 16:
38703 return 2;
38704 case 32:
38705 return 3;
38706 case 64:
38707 return 4;
38708 default:
38709 return -1;
38713 /* Return the cost of moving data of mode M between a
38714 register and memory. A value of 2 is the default; this cost is
38715 relative to those in `REGISTER_MOVE_COST'.
38717 This function is used extensively by register_move_cost that is used to
38718 build tables at startup. Make it inline in this case.
38719 When IN is 2, return maximum of in and out move cost.
38721 If moving between registers and memory is more expensive than
38722 between two registers, you should define this macro to express the
38723 relative cost.
38725 Model also increased moving costs of QImode registers in non
38726 Q_REGS classes.
38728 static inline int
38729 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
38730 int in)
38732 int cost;
38733 if (FLOAT_CLASS_P (regclass))
38735 int index;
38736 switch (mode)
38738 case E_SFmode:
38739 index = 0;
38740 break;
38741 case E_DFmode:
38742 index = 1;
38743 break;
38744 case E_XFmode:
38745 index = 2;
38746 break;
38747 default:
38748 return 100;
38750 if (in == 2)
38751 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
38752 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
38754 if (SSE_CLASS_P (regclass))
38756 int index = sse_store_index (mode);
38757 if (index == -1)
38758 return 100;
38759 if (in == 2)
38760 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
38761 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
38763 if (MMX_CLASS_P (regclass))
38765 int index;
38766 switch (GET_MODE_SIZE (mode))
38768 case 4:
38769 index = 0;
38770 break;
38771 case 8:
38772 index = 1;
38773 break;
38774 default:
38775 return 100;
38777 if (in)
38778 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
38779 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
38781 switch (GET_MODE_SIZE (mode))
38783 case 1:
38784 if (Q_CLASS_P (regclass) || TARGET_64BIT)
38786 if (!in)
38787 return ix86_cost->int_store[0];
38788 if (TARGET_PARTIAL_REG_DEPENDENCY
38789 && optimize_function_for_speed_p (cfun))
38790 cost = ix86_cost->movzbl_load;
38791 else
38792 cost = ix86_cost->int_load[0];
38793 if (in == 2)
38794 return MAX (cost, ix86_cost->int_store[0]);
38795 return cost;
38797 else
38799 if (in == 2)
38800 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
38801 if (in)
38802 return ix86_cost->movzbl_load;
38803 else
38804 return ix86_cost->int_store[0] + 4;
38806 break;
38807 case 2:
38808 if (in == 2)
38809 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
38810 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
38811 default:
38812 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
38813 if (mode == TFmode)
38814 mode = XFmode;
38815 if (in == 2)
38816 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
38817 else if (in)
38818 cost = ix86_cost->int_load[2];
38819 else
38820 cost = ix86_cost->int_store[2];
38821 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
38825 static int
38826 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
38827 bool in)
38829 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
38833 /* Return the cost of moving data from a register in class CLASS1 to
38834 one in class CLASS2.
38836 It is not required that the cost always equal 2 when FROM is the same as TO;
38837 on some machines it is expensive to move between registers if they are not
38838 general registers. */
38840 static int
38841 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
38842 reg_class_t class2_i)
38844 enum reg_class class1 = (enum reg_class) class1_i;
38845 enum reg_class class2 = (enum reg_class) class2_i;
38847 /* In case we require secondary memory, compute cost of the store followed
38848 by load. In order to avoid bad register allocation choices, we need
38849 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
38851 if (inline_secondary_memory_needed (mode, class1, class2, false))
38853 int cost = 1;
38855 cost += inline_memory_move_cost (mode, class1, 2);
38856 cost += inline_memory_move_cost (mode, class2, 2);
38858 /* In case of copying from general_purpose_register we may emit multiple
38859 stores followed by single load causing memory size mismatch stall.
38860 Count this as arbitrarily high cost of 20. */
38861 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
38862 && TARGET_MEMORY_MISMATCH_STALL
38863 && targetm.class_max_nregs (class1, mode)
38864 > targetm.class_max_nregs (class2, mode))
38865 cost += 20;
38867 /* In the case of FP/MMX moves, the registers actually overlap, and we
38868 have to switch modes in order to treat them differently. */
38869 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
38870 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
38871 cost += 20;
38873 return cost;
38876 /* Moves between SSE/MMX and integer unit are expensive. */
38877 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
38878 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
38880 /* ??? By keeping returned value relatively high, we limit the number
38881 of moves between integer and MMX/SSE registers for all targets.
38882 Additionally, high value prevents problem with x86_modes_tieable_p(),
38883 where integer modes in MMX/SSE registers are not tieable
38884 because of missing QImode and HImode moves to, from or between
38885 MMX/SSE registers. */
38886 return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2)
38887 ? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer);
38889 if (MAYBE_FLOAT_CLASS_P (class1))
38890 return ix86_cost->fp_move;
38891 if (MAYBE_SSE_CLASS_P (class1))
38893 if (GET_MODE_BITSIZE (mode) <= 128)
38894 return ix86_cost->xmm_move;
38895 if (GET_MODE_BITSIZE (mode) <= 256)
38896 return ix86_cost->ymm_move;
38897 return ix86_cost->zmm_move;
38899 if (MAYBE_MMX_CLASS_P (class1))
38900 return ix86_cost->mmx_move;
38901 return 2;
38904 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
38905 words of a value of mode MODE but can be less for certain modes in
38906 special long registers.
38908 Actually there are no two word move instructions for consecutive
38909 registers. And only registers 0-3 may have mov byte instructions
38910 applied to them. */
38912 static unsigned int
38913 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
38915 if (GENERAL_REGNO_P (regno))
38917 if (mode == XFmode)
38918 return TARGET_64BIT ? 2 : 3;
38919 if (mode == XCmode)
38920 return TARGET_64BIT ? 4 : 6;
38921 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
38923 if (COMPLEX_MODE_P (mode))
38924 return 2;
38925 if (mode == V64SFmode || mode == V64SImode)
38926 return 4;
38927 return 1;
38930 /* Implement TARGET_HARD_REGNO_MODE_OK. */
38932 static bool
38933 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
38935 /* Flags and only flags can only hold CCmode values. */
38936 if (CC_REGNO_P (regno))
38937 return GET_MODE_CLASS (mode) == MODE_CC;
38938 if (GET_MODE_CLASS (mode) == MODE_CC
38939 || GET_MODE_CLASS (mode) == MODE_RANDOM
38940 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
38941 return false;
38942 if (STACK_REGNO_P (regno))
38943 return VALID_FP_MODE_P (mode);
38944 if (MASK_REGNO_P (regno))
38945 return (VALID_MASK_REG_MODE (mode)
38946 || (TARGET_AVX512BW
38947 && VALID_MASK_AVX512BW_MODE (mode)));
38948 if (BND_REGNO_P (regno))
38949 return VALID_BND_REG_MODE (mode);
38950 if (SSE_REGNO_P (regno))
38952 /* We implement the move patterns for all vector modes into and
38953 out of SSE registers, even when no operation instructions
38954 are available. */
38956 /* For AVX-512 we allow, regardless of regno:
38957 - XI mode
38958 - any of 512-bit wide vector mode
38959 - any scalar mode. */
38960 if (TARGET_AVX512F
38961 && (mode == XImode
38962 || VALID_AVX512F_REG_MODE (mode)
38963 || VALID_AVX512F_SCALAR_MODE (mode)))
38964 return true;
38966 /* For AVX-5124FMAPS allow V64SFmode for special regnos. */
38967 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
38968 && MOD4_SSE_REGNO_P (regno)
38969 && mode == V64SFmode)
38970 return true;
38972 /* For AVX-5124VNNIW allow V64SImode for special regnos. */
38973 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
38974 && MOD4_SSE_REGNO_P (regno)
38975 && mode == V64SImode)
38976 return true;
38978 /* TODO check for QI/HI scalars. */
38979 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
38980 if (TARGET_AVX512VL
38981 && (mode == OImode
38982 || mode == TImode
38983 || VALID_AVX256_REG_MODE (mode)
38984 || VALID_AVX512VL_128_REG_MODE (mode)))
38985 return true;
38987 /* xmm16-xmm31 are only available for AVX-512. */
38988 if (EXT_REX_SSE_REGNO_P (regno))
38989 return false;
38991 /* OImode and AVX modes are available only when AVX is enabled. */
38992 return ((TARGET_AVX
38993 && VALID_AVX256_REG_OR_OI_MODE (mode))
38994 || VALID_SSE_REG_MODE (mode)
38995 || VALID_SSE2_REG_MODE (mode)
38996 || VALID_MMX_REG_MODE (mode)
38997 || VALID_MMX_REG_MODE_3DNOW (mode));
38999 if (MMX_REGNO_P (regno))
39001 /* We implement the move patterns for 3DNOW modes even in MMX mode,
39002 so if the register is available at all, then we can move data of
39003 the given mode into or out of it. */
39004 return (VALID_MMX_REG_MODE (mode)
39005 || VALID_MMX_REG_MODE_3DNOW (mode));
39008 if (mode == QImode)
39010 /* Take care for QImode values - they can be in non-QI regs,
39011 but then they do cause partial register stalls. */
39012 if (ANY_QI_REGNO_P (regno))
39013 return true;
39014 if (!TARGET_PARTIAL_REG_STALL)
39015 return true;
39016 /* LRA checks if the hard register is OK for the given mode.
39017 QImode values can live in non-QI regs, so we allow all
39018 registers here. */
39019 if (lra_in_progress)
39020 return true;
39021 return !can_create_pseudo_p ();
39023 /* We handle both integer and floats in the general purpose registers. */
39024 else if (VALID_INT_MODE_P (mode))
39025 return true;
39026 else if (VALID_FP_MODE_P (mode))
39027 return true;
39028 else if (VALID_DFP_MODE_P (mode))
39029 return true;
39030 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
39031 on to use that value in smaller contexts, this can easily force a
39032 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
39033 supporting DImode, allow it. */
39034 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
39035 return true;
39037 return false;
39040 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
39041 saves SSE registers across calls is Win64 (thus no need to check the
39042 current ABI here), and with AVX enabled Win64 only guarantees that
39043 the low 16 bytes are saved. */
39045 static bool
39046 ix86_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
39048 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
39051 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
39052 tieable integer mode. */
39054 static bool
39055 ix86_tieable_integer_mode_p (machine_mode mode)
39057 switch (mode)
39059 case E_HImode:
39060 case E_SImode:
39061 return true;
39063 case E_QImode:
39064 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
39066 case E_DImode:
39067 return TARGET_64BIT;
39069 default:
39070 return false;
39074 /* Implement TARGET_MODES_TIEABLE_P.
39076 Return true if MODE1 is accessible in a register that can hold MODE2
39077 without copying. That is, all register classes that can hold MODE2
39078 can also hold MODE1. */
39080 static bool
39081 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
39083 if (mode1 == mode2)
39084 return true;
39086 if (ix86_tieable_integer_mode_p (mode1)
39087 && ix86_tieable_integer_mode_p (mode2))
39088 return true;
39090 /* MODE2 being XFmode implies fp stack or general regs, which means we
39091 can tie any smaller floating point modes to it. Note that we do not
39092 tie this with TFmode. */
39093 if (mode2 == XFmode)
39094 return mode1 == SFmode || mode1 == DFmode;
39096 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
39097 that we can tie it with SFmode. */
39098 if (mode2 == DFmode)
39099 return mode1 == SFmode;
39101 /* If MODE2 is only appropriate for an SSE register, then tie with
39102 any other mode acceptable to SSE registers. */
39103 if (GET_MODE_SIZE (mode2) == 32
39104 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
39105 return (GET_MODE_SIZE (mode1) == 32
39106 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
39107 if (GET_MODE_SIZE (mode2) == 16
39108 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
39109 return (GET_MODE_SIZE (mode1) == 16
39110 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
39112 /* If MODE2 is appropriate for an MMX register, then tie
39113 with any other mode acceptable to MMX registers. */
39114 if (GET_MODE_SIZE (mode2) == 8
39115 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
39116 return (GET_MODE_SIZE (mode1) == 8
39117 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
39119 return false;
39122 /* Return the cost of moving between two registers of mode MODE. */
39124 static int
39125 ix86_set_reg_reg_cost (machine_mode mode)
39127 unsigned int units = UNITS_PER_WORD;
39129 switch (GET_MODE_CLASS (mode))
39131 default:
39132 break;
39134 case MODE_CC:
39135 units = GET_MODE_SIZE (CCmode);
39136 break;
39138 case MODE_FLOAT:
39139 if ((TARGET_SSE && mode == TFmode)
39140 || (TARGET_80387 && mode == XFmode)
39141 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
39142 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
39143 units = GET_MODE_SIZE (mode);
39144 break;
39146 case MODE_COMPLEX_FLOAT:
39147 if ((TARGET_SSE && mode == TCmode)
39148 || (TARGET_80387 && mode == XCmode)
39149 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
39150 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
39151 units = GET_MODE_SIZE (mode);
39152 break;
39154 case MODE_VECTOR_INT:
39155 case MODE_VECTOR_FLOAT:
39156 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
39157 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
39158 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
39159 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
39160 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
39161 units = GET_MODE_SIZE (mode);
39164 /* Return the cost of moving between two registers of mode MODE,
39165 assuming that the move will be in pieces of at most UNITS bytes. */
39166 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
39169 /* Return cost of vector operation in MODE given that scalar version has
39170 COST. If PARALLEL is true assume that CPU has more than one unit
39171 performing the operation. */
39173 static int
39174 ix86_vec_cost (machine_mode mode, int cost, bool parallel)
39176 if (!VECTOR_MODE_P (mode))
39177 return cost;
39179 if (!parallel)
39180 return cost * GET_MODE_NUNITS (mode);
39181 if (GET_MODE_BITSIZE (mode) == 128
39182 && TARGET_SSE_SPLIT_REGS)
39183 return cost * 2;
39184 if (GET_MODE_BITSIZE (mode) > 128
39185 && TARGET_AVX128_OPTIMAL)
39186 return cost * GET_MODE_BITSIZE (mode) / 128;
39187 return cost;
39190 /* Compute a (partial) cost for rtx X. Return true if the complete
39191 cost has been computed, and false if subexpressions should be
39192 scanned. In either case, *TOTAL contains the cost result. */
39194 static bool
39195 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
39196 int *total, bool speed)
39198 rtx mask;
39199 enum rtx_code code = GET_CODE (x);
39200 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
39201 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
39202 int src_cost;
39203 machine_mode inner_mode = mode;
39204 if (VECTOR_MODE_P (mode))
39205 inner_mode = GET_MODE_INNER (mode);
39207 switch (code)
39209 case SET:
39210 if (register_operand (SET_DEST (x), VOIDmode)
39211 && reg_or_0_operand (SET_SRC (x), VOIDmode))
39213 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
39214 return true;
39217 if (register_operand (SET_SRC (x), VOIDmode))
39218 /* Avoid potentially incorrect high cost from rtx_costs
39219 for non-tieable SUBREGs. */
39220 src_cost = 0;
39221 else
39223 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
39225 if (CONSTANT_P (SET_SRC (x)))
39226 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
39227 a small value, possibly zero for cheap constants. */
39228 src_cost += COSTS_N_INSNS (1);
39231 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
39232 return true;
39234 case CONST_INT:
39235 case CONST:
39236 case LABEL_REF:
39237 case SYMBOL_REF:
39238 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
39239 *total = 3;
39240 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
39241 *total = 2;
39242 else if (flag_pic && SYMBOLIC_CONST (x)
39243 && !(TARGET_64BIT
39244 && (GET_CODE (x) == LABEL_REF
39245 || (GET_CODE (x) == SYMBOL_REF
39246 && SYMBOL_REF_LOCAL_P (x))))
39247 /* Use 0 cost for CONST to improve its propagation. */
39248 && (TARGET_64BIT || GET_CODE (x) != CONST))
39249 *total = 1;
39250 else
39251 *total = 0;
39252 return true;
39254 case CONST_DOUBLE:
39255 if (IS_STACK_MODE (mode))
39256 switch (standard_80387_constant_p (x))
39258 case -1:
39259 case 0:
39260 break;
39261 case 1: /* 0.0 */
39262 *total = 1;
39263 return true;
39264 default: /* Other constants */
39265 *total = 2;
39266 return true;
39268 /* FALLTHRU */
39270 case CONST_VECTOR:
39271 switch (standard_sse_constant_p (x, mode))
39273 case 0:
39274 break;
39275 case 1: /* 0: xor eliminates false dependency */
39276 *total = 0;
39277 return true;
39278 default: /* -1: cmp contains false dependency */
39279 *total = 1;
39280 return true;
39282 /* FALLTHRU */
39284 case CONST_WIDE_INT:
39285 /* Fall back to (MEM (SYMBOL_REF)), since that's where
39286 it'll probably end up. Add a penalty for size. */
39287 *total = (COSTS_N_INSNS (1)
39288 + (!TARGET_64BIT && flag_pic)
39289 + (GET_MODE_SIZE (mode) <= 4
39290 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
39291 return true;
39293 case ZERO_EXTEND:
39294 /* The zero extensions is often completely free on x86_64, so make
39295 it as cheap as possible. */
39296 if (TARGET_64BIT && mode == DImode
39297 && GET_MODE (XEXP (x, 0)) == SImode)
39298 *total = 1;
39299 else if (TARGET_ZERO_EXTEND_WITH_AND)
39300 *total = cost->add;
39301 else
39302 *total = cost->movzx;
39303 return false;
39305 case SIGN_EXTEND:
39306 *total = cost->movsx;
39307 return false;
39309 case ASHIFT:
39310 if (SCALAR_INT_MODE_P (mode)
39311 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
39312 && CONST_INT_P (XEXP (x, 1)))
39314 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
39315 if (value == 1)
39317 *total = cost->add;
39318 return false;
39320 if ((value == 2 || value == 3)
39321 && cost->lea <= cost->shift_const)
39323 *total = cost->lea;
39324 return false;
39327 /* FALLTHRU */
39329 case ROTATE:
39330 case ASHIFTRT:
39331 case LSHIFTRT:
39332 case ROTATERT:
39333 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
39335 /* ??? Should be SSE vector operation cost. */
39336 /* At least for published AMD latencies, this really is the same
39337 as the latency for a simple fpu operation like fabs. */
39338 /* V*QImode is emulated with 1-11 insns. */
39339 if (mode == V16QImode || mode == V32QImode)
39341 int count = 11;
39342 if (TARGET_XOP && mode == V16QImode)
39344 /* For XOP we use vpshab, which requires a broadcast of the
39345 value to the variable shift insn. For constants this
39346 means a V16Q const in mem; even when we can perform the
39347 shift with one insn set the cost to prefer paddb. */
39348 if (CONSTANT_P (XEXP (x, 1)))
39350 *total = ix86_vec_cost (mode,
39351 cost->sse_op
39352 + rtx_cost (XEXP (x, 0), mode, code, 0, speed)
39353 + (speed ? 2 : COSTS_N_BYTES (16)), true);
39354 return true;
39356 count = 3;
39358 else if (TARGET_SSSE3)
39359 count = 7;
39360 *total = ix86_vec_cost (mode, cost->sse_op * count, true);
39362 else
39363 *total = ix86_vec_cost (mode, cost->sse_op, true);
39365 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39367 if (CONST_INT_P (XEXP (x, 1)))
39369 if (INTVAL (XEXP (x, 1)) > 32)
39370 *total = cost->shift_const + COSTS_N_INSNS (2);
39371 else
39372 *total = cost->shift_const * 2;
39374 else
39376 if (GET_CODE (XEXP (x, 1)) == AND)
39377 *total = cost->shift_var * 2;
39378 else
39379 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
39382 else
39384 if (CONST_INT_P (XEXP (x, 1)))
39385 *total = cost->shift_const;
39386 else if (SUBREG_P (XEXP (x, 1))
39387 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
39389 /* Return the cost after shift-and truncation. */
39390 *total = cost->shift_var;
39391 return true;
39393 else
39394 *total = cost->shift_var;
39396 return false;
39398 case FMA:
39400 rtx sub;
39402 gcc_assert (FLOAT_MODE_P (mode));
39403 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
39405 *total = ix86_vec_cost (mode,
39406 mode == SFmode ? cost->fmass : cost->fmasd,
39407 true);
39408 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
39410 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
39411 sub = XEXP (x, 0);
39412 if (GET_CODE (sub) == NEG)
39413 sub = XEXP (sub, 0);
39414 *total += rtx_cost (sub, mode, FMA, 0, speed);
39416 sub = XEXP (x, 2);
39417 if (GET_CODE (sub) == NEG)
39418 sub = XEXP (sub, 0);
39419 *total += rtx_cost (sub, mode, FMA, 2, speed);
39420 return true;
39423 case MULT:
39424 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39426 *total = inner_mode == DFmode ? cost->mulsd : cost->mulss;
39427 return false;
39429 else if (X87_FLOAT_MODE_P (mode))
39431 *total = cost->fmul;
39432 return false;
39434 else if (FLOAT_MODE_P (mode))
39436 *total = ix86_vec_cost (mode,
39437 inner_mode == DFmode
39438 ? cost->mulsd : cost->mulss, true);
39439 return false;
39441 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
39443 /* V*QImode is emulated with 7-13 insns. */
39444 if (mode == V16QImode || mode == V32QImode)
39446 int extra = 11;
39447 if (TARGET_XOP && mode == V16QImode)
39448 extra = 5;
39449 else if (TARGET_SSSE3)
39450 extra = 6;
39451 *total = ix86_vec_cost (mode,
39452 cost->mulss * 2 + cost->sse_op * extra,
39453 true);
39455 /* V*DImode is emulated with 5-8 insns. */
39456 else if (mode == V2DImode || mode == V4DImode)
39458 if (TARGET_XOP && mode == V2DImode)
39459 *total = ix86_vec_cost (mode,
39460 cost->mulss * 2 + cost->sse_op * 3,
39461 true);
39462 else
39463 *total = ix86_vec_cost (mode,
39464 cost->mulss * 3 + cost->sse_op * 5,
39465 true);
39467 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
39468 insns, including two PMULUDQ. */
39469 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
39470 *total = ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5,
39471 true);
39472 else
39473 *total = ix86_vec_cost (mode, cost->mulss, true);
39474 return false;
39476 else
39478 rtx op0 = XEXP (x, 0);
39479 rtx op1 = XEXP (x, 1);
39480 int nbits;
39481 if (CONST_INT_P (XEXP (x, 1)))
39483 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
39484 for (nbits = 0; value != 0; value &= value - 1)
39485 nbits++;
39487 else
39488 /* This is arbitrary. */
39489 nbits = 7;
39491 /* Compute costs correctly for widening multiplication. */
39492 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
39493 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
39494 == GET_MODE_SIZE (mode))
39496 int is_mulwiden = 0;
39497 machine_mode inner_mode = GET_MODE (op0);
39499 if (GET_CODE (op0) == GET_CODE (op1))
39500 is_mulwiden = 1, op1 = XEXP (op1, 0);
39501 else if (CONST_INT_P (op1))
39503 if (GET_CODE (op0) == SIGN_EXTEND)
39504 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
39505 == INTVAL (op1);
39506 else
39507 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
39510 if (is_mulwiden)
39511 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
39514 *total = (cost->mult_init[MODE_INDEX (mode)]
39515 + nbits * cost->mult_bit
39516 + rtx_cost (op0, mode, outer_code, opno, speed)
39517 + rtx_cost (op1, mode, outer_code, opno, speed));
39519 return true;
39522 case DIV:
39523 case UDIV:
39524 case MOD:
39525 case UMOD:
39526 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39527 *total = inner_mode == DFmode ? cost->divsd : cost->divss;
39528 else if (X87_FLOAT_MODE_P (mode))
39529 *total = cost->fdiv;
39530 else if (FLOAT_MODE_P (mode))
39531 *total = ix86_vec_cost (mode,
39532 inner_mode == DFmode ? cost->divsd : cost->divss,
39533 true);
39534 else
39535 *total = cost->divide[MODE_INDEX (mode)];
39536 return false;
39538 case PLUS:
39539 if (GET_MODE_CLASS (mode) == MODE_INT
39540 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
39542 if (GET_CODE (XEXP (x, 0)) == PLUS
39543 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
39544 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
39545 && CONSTANT_P (XEXP (x, 1)))
39547 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
39548 if (val == 2 || val == 4 || val == 8)
39550 *total = cost->lea;
39551 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
39552 outer_code, opno, speed);
39553 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
39554 outer_code, opno, speed);
39555 *total += rtx_cost (XEXP (x, 1), mode,
39556 outer_code, opno, speed);
39557 return true;
39560 else if (GET_CODE (XEXP (x, 0)) == MULT
39561 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
39563 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
39564 if (val == 2 || val == 4 || val == 8)
39566 *total = cost->lea;
39567 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
39568 outer_code, opno, speed);
39569 *total += rtx_cost (XEXP (x, 1), mode,
39570 outer_code, opno, speed);
39571 return true;
39574 else if (GET_CODE (XEXP (x, 0)) == PLUS)
39576 /* Add with carry, ignore the cost of adding a carry flag. */
39577 if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
39578 *total = cost->add;
39579 else
39581 *total = cost->lea;
39582 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
39583 outer_code, opno, speed);
39586 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
39587 outer_code, opno, speed);
39588 *total += rtx_cost (XEXP (x, 1), mode,
39589 outer_code, opno, speed);
39590 return true;
39593 /* FALLTHRU */
39595 case MINUS:
39596 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
39597 if (GET_MODE_CLASS (mode) == MODE_INT
39598 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
39599 && GET_CODE (XEXP (x, 0)) == MINUS
39600 && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
39602 *total = cost->add;
39603 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
39604 outer_code, opno, speed);
39605 *total += rtx_cost (XEXP (x, 1), mode,
39606 outer_code, opno, speed);
39607 return true;
39610 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39612 *total = cost->addss;
39613 return false;
39615 else if (X87_FLOAT_MODE_P (mode))
39617 *total = cost->fadd;
39618 return false;
39620 else if (FLOAT_MODE_P (mode))
39622 *total = ix86_vec_cost (mode, cost->addss, true);
39623 return false;
39625 /* FALLTHRU */
39627 case AND:
39628 case IOR:
39629 case XOR:
39630 if (GET_MODE_CLASS (mode) == MODE_INT
39631 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39633 *total = (cost->add * 2
39634 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
39635 << (GET_MODE (XEXP (x, 0)) != DImode))
39636 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
39637 << (GET_MODE (XEXP (x, 1)) != DImode)));
39638 return true;
39640 /* FALLTHRU */
39642 case NEG:
39643 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39645 *total = cost->sse_op;
39646 return false;
39648 else if (X87_FLOAT_MODE_P (mode))
39650 *total = cost->fchs;
39651 return false;
39653 else if (FLOAT_MODE_P (mode))
39655 *total = ix86_vec_cost (mode, cost->sse_op, true);
39656 return false;
39658 /* FALLTHRU */
39660 case NOT:
39661 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
39662 *total = ix86_vec_cost (mode, cost->sse_op, true);
39663 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
39664 *total = cost->add * 2;
39665 else
39666 *total = cost->add;
39667 return false;
39669 case COMPARE:
39670 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
39671 && XEXP (XEXP (x, 0), 1) == const1_rtx
39672 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
39673 && XEXP (x, 1) == const0_rtx)
39675 /* This kind of construct is implemented using test[bwl].
39676 Treat it as if we had an AND. */
39677 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
39678 *total = (cost->add
39679 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
39680 opno, speed)
39681 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
39682 return true;
39685 /* The embedded comparison operand is completely free. */
39686 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
39687 && XEXP (x, 1) == const0_rtx)
39688 *total = 0;
39690 return false;
39692 case FLOAT_EXTEND:
39693 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
39694 *total = 0;
39695 else
39696 *total = ix86_vec_cost (mode, cost->addss, true);
39697 return false;
39699 case FLOAT_TRUNCATE:
39700 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
39701 *total = cost->fadd;
39702 else
39703 *total = ix86_vec_cost (mode, cost->addss, true);
39704 return false;
39706 case ABS:
39707 /* SSE requires memory load for the constant operand. It may make
39708 sense to account for this. Of course the constant operand may or
39709 may not be reused. */
39710 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39711 *total = cost->sse_op;
39712 else if (X87_FLOAT_MODE_P (mode))
39713 *total = cost->fabs;
39714 else if (FLOAT_MODE_P (mode))
39715 *total = ix86_vec_cost (mode, cost->sse_op, true);
39716 return false;
39718 case SQRT:
39719 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39720 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
39721 else if (X87_FLOAT_MODE_P (mode))
39722 *total = cost->fsqrt;
39723 else if (FLOAT_MODE_P (mode))
39724 *total = ix86_vec_cost (mode,
39725 mode == SFmode ? cost->sqrtss : cost->sqrtsd,
39726 true);
39727 return false;
39729 case UNSPEC:
39730 if (XINT (x, 1) == UNSPEC_TP)
39731 *total = 0;
39732 return false;
39734 case VEC_SELECT:
39735 case VEC_CONCAT:
39736 case VEC_DUPLICATE:
39737 /* ??? Assume all of these vector manipulation patterns are
39738 recognizable. In which case they all pretty much have the
39739 same cost. */
39740 *total = cost->sse_op;
39741 return true;
39742 case VEC_MERGE:
39743 mask = XEXP (x, 2);
39744 /* This is masked instruction, assume the same cost,
39745 as nonmasked variant. */
39746 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
39747 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
39748 else
39749 *total = cost->sse_op;
39750 return true;
39752 default:
39753 return false;
39757 #if TARGET_MACHO
39759 static int current_machopic_label_num;
39761 /* Given a symbol name and its associated stub, write out the
39762 definition of the stub. */
39764 void
39765 machopic_output_stub (FILE *file, const char *symb, const char *stub)
39767 unsigned int length;
39768 char *binder_name, *symbol_name, lazy_ptr_name[32];
39769 int label = ++current_machopic_label_num;
39771 /* For 64-bit we shouldn't get here. */
39772 gcc_assert (!TARGET_64BIT);
39774 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
39775 symb = targetm.strip_name_encoding (symb);
39777 length = strlen (stub);
39778 binder_name = XALLOCAVEC (char, length + 32);
39779 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
39781 length = strlen (symb);
39782 symbol_name = XALLOCAVEC (char, length + 32);
39783 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
39785 sprintf (lazy_ptr_name, "L%d$lz", label);
39787 if (MACHOPIC_ATT_STUB)
39788 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
39789 else if (MACHOPIC_PURE)
39790 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
39791 else
39792 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
39794 fprintf (file, "%s:\n", stub);
39795 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
39797 if (MACHOPIC_ATT_STUB)
39799 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
39801 else if (MACHOPIC_PURE)
39803 /* PIC stub. */
39804 /* 25-byte PIC stub using "CALL get_pc_thunk". */
39805 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
39806 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
39807 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
39808 label, lazy_ptr_name, label);
39809 fprintf (file, "\tjmp\t*%%ecx\n");
39811 else
39812 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
39814 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
39815 it needs no stub-binding-helper. */
39816 if (MACHOPIC_ATT_STUB)
39817 return;
39819 fprintf (file, "%s:\n", binder_name);
39821 if (MACHOPIC_PURE)
39823 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
39824 fprintf (file, "\tpushl\t%%ecx\n");
39826 else
39827 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
39829 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
39831 /* N.B. Keep the correspondence of these
39832 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
39833 old-pic/new-pic/non-pic stubs; altering this will break
39834 compatibility with existing dylibs. */
39835 if (MACHOPIC_PURE)
39837 /* 25-byte PIC stub using "CALL get_pc_thunk". */
39838 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
39840 else
39841 /* 16-byte -mdynamic-no-pic stub. */
39842 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
39844 fprintf (file, "%s:\n", lazy_ptr_name);
39845 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
39846 fprintf (file, ASM_LONG "%s\n", binder_name);
39848 #endif /* TARGET_MACHO */
39850 /* Order the registers for register allocator. */
39852 void
39853 x86_order_regs_for_local_alloc (void)
39855 int pos = 0;
39856 int i;
39858 /* First allocate the local general purpose registers. */
39859 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
39860 if (GENERAL_REGNO_P (i) && call_used_regs[i])
39861 reg_alloc_order [pos++] = i;
39863 /* Global general purpose registers. */
39864 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
39865 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
39866 reg_alloc_order [pos++] = i;
39868 /* x87 registers come first in case we are doing FP math
39869 using them. */
39870 if (!TARGET_SSE_MATH)
39871 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
39872 reg_alloc_order [pos++] = i;
39874 /* SSE registers. */
39875 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
39876 reg_alloc_order [pos++] = i;
39877 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
39878 reg_alloc_order [pos++] = i;
39880 /* Extended REX SSE registers. */
39881 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
39882 reg_alloc_order [pos++] = i;
39884 /* Mask register. */
39885 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
39886 reg_alloc_order [pos++] = i;
39888 /* MPX bound registers. */
39889 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
39890 reg_alloc_order [pos++] = i;
39892 /* x87 registers. */
39893 if (TARGET_SSE_MATH)
39894 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
39895 reg_alloc_order [pos++] = i;
39897 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
39898 reg_alloc_order [pos++] = i;
39900 /* Initialize the rest of array as we do not allocate some registers
39901 at all. */
39902 while (pos < FIRST_PSEUDO_REGISTER)
39903 reg_alloc_order [pos++] = 0;
39906 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
39907 in struct attribute_spec handler. */
39908 static tree
39909 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
39910 tree args,
39911 int,
39912 bool *no_add_attrs)
39914 if (TREE_CODE (*node) != FUNCTION_TYPE
39915 && TREE_CODE (*node) != METHOD_TYPE
39916 && TREE_CODE (*node) != FIELD_DECL
39917 && TREE_CODE (*node) != TYPE_DECL)
39919 warning (OPT_Wattributes, "%qE attribute only applies to functions",
39920 name);
39921 *no_add_attrs = true;
39922 return NULL_TREE;
39924 if (TARGET_64BIT)
39926 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
39927 name);
39928 *no_add_attrs = true;
39929 return NULL_TREE;
39931 if (is_attribute_p ("callee_pop_aggregate_return", name))
39933 tree cst;
39935 cst = TREE_VALUE (args);
39936 if (TREE_CODE (cst) != INTEGER_CST)
39938 warning (OPT_Wattributes,
39939 "%qE attribute requires an integer constant argument",
39940 name);
39941 *no_add_attrs = true;
39943 else if (compare_tree_int (cst, 0) != 0
39944 && compare_tree_int (cst, 1) != 0)
39946 warning (OPT_Wattributes,
39947 "argument to %qE attribute is neither zero, nor one",
39948 name);
39949 *no_add_attrs = true;
39952 return NULL_TREE;
39955 return NULL_TREE;
39958 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
39959 struct attribute_spec.handler. */
39960 static tree
39961 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
39962 bool *no_add_attrs)
39964 if (TREE_CODE (*node) != FUNCTION_TYPE
39965 && TREE_CODE (*node) != METHOD_TYPE
39966 && TREE_CODE (*node) != FIELD_DECL
39967 && TREE_CODE (*node) != TYPE_DECL)
39969 warning (OPT_Wattributes, "%qE attribute only applies to functions",
39970 name);
39971 *no_add_attrs = true;
39972 return NULL_TREE;
39975 /* Can combine regparm with all attributes but fastcall. */
39976 if (is_attribute_p ("ms_abi", name))
39978 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
39980 error ("ms_abi and sysv_abi attributes are not compatible");
39983 return NULL_TREE;
39985 else if (is_attribute_p ("sysv_abi", name))
39987 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
39989 error ("ms_abi and sysv_abi attributes are not compatible");
39992 return NULL_TREE;
39995 return NULL_TREE;
39998 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
39999 struct attribute_spec.handler. */
40000 static tree
40001 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
40002 bool *no_add_attrs)
40004 tree *type = NULL;
40005 if (DECL_P (*node))
40007 if (TREE_CODE (*node) == TYPE_DECL)
40008 type = &TREE_TYPE (*node);
40010 else
40011 type = node;
40013 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
40015 warning (OPT_Wattributes, "%qE attribute ignored",
40016 name);
40017 *no_add_attrs = true;
40020 else if ((is_attribute_p ("ms_struct", name)
40021 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
40022 || ((is_attribute_p ("gcc_struct", name)
40023 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
40025 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
40026 name);
40027 *no_add_attrs = true;
40030 return NULL_TREE;
40033 static tree
40034 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
40035 bool *no_add_attrs)
40037 if (TREE_CODE (*node) != FUNCTION_DECL)
40039 warning (OPT_Wattributes, "%qE attribute only applies to functions",
40040 name);
40041 *no_add_attrs = true;
40043 return NULL_TREE;
40046 static tree
40047 ix86_handle_no_caller_saved_registers_attribute (tree *, tree, tree,
40048 int, bool *)
40050 return NULL_TREE;
40053 static tree
40054 ix86_handle_interrupt_attribute (tree *node, tree, tree, int, bool *)
40056 /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet,
40057 but the function type contains args and return type data. */
40058 tree func_type = *node;
40059 tree return_type = TREE_TYPE (func_type);
40061 int nargs = 0;
40062 tree current_arg_type = TYPE_ARG_TYPES (func_type);
40063 while (current_arg_type
40064 && ! VOID_TYPE_P (TREE_VALUE (current_arg_type)))
40066 if (nargs == 0)
40068 if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type)))
40069 error ("interrupt service routine should have a pointer "
40070 "as the first argument");
40072 else if (nargs == 1)
40074 if (TREE_CODE (TREE_VALUE (current_arg_type)) != INTEGER_TYPE
40075 || TYPE_MODE (TREE_VALUE (current_arg_type)) != word_mode)
40076 error ("interrupt service routine should have unsigned %s"
40077 "int as the second argument",
40078 TARGET_64BIT
40079 ? (TARGET_X32 ? "long long " : "long ")
40080 : "");
40082 nargs++;
40083 current_arg_type = TREE_CHAIN (current_arg_type);
40085 if (!nargs || nargs > 2)
40086 error ("interrupt service routine can only have a pointer argument "
40087 "and an optional integer argument");
40088 if (! VOID_TYPE_P (return_type))
40089 error ("interrupt service routine can't have non-void return value");
40091 return NULL_TREE;
40094 static bool
40095 ix86_ms_bitfield_layout_p (const_tree record_type)
40097 return ((TARGET_MS_BITFIELD_LAYOUT
40098 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
40099 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
40102 /* Returns an expression indicating where the this parameter is
40103 located on entry to the FUNCTION. */
40105 static rtx
40106 x86_this_parameter (tree function)
40108 tree type = TREE_TYPE (function);
40109 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
40110 int nregs;
40112 if (TARGET_64BIT)
40114 const int *parm_regs;
40116 if (ix86_function_type_abi (type) == MS_ABI)
40117 parm_regs = x86_64_ms_abi_int_parameter_registers;
40118 else
40119 parm_regs = x86_64_int_parameter_registers;
40120 return gen_rtx_REG (Pmode, parm_regs[aggr]);
40123 nregs = ix86_function_regparm (type, function);
40125 if (nregs > 0 && !stdarg_p (type))
40127 int regno;
40128 unsigned int ccvt = ix86_get_callcvt (type);
40130 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
40131 regno = aggr ? DX_REG : CX_REG;
40132 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
40134 regno = CX_REG;
40135 if (aggr)
40136 return gen_rtx_MEM (SImode,
40137 plus_constant (Pmode, stack_pointer_rtx, 4));
40139 else
40141 regno = AX_REG;
40142 if (aggr)
40144 regno = DX_REG;
40145 if (nregs == 1)
40146 return gen_rtx_MEM (SImode,
40147 plus_constant (Pmode,
40148 stack_pointer_rtx, 4));
40151 return gen_rtx_REG (SImode, regno);
40154 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
40155 aggr ? 8 : 4));
40158 /* Determine whether x86_output_mi_thunk can succeed. */
40160 static bool
40161 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
40162 const_tree function)
40164 /* 64-bit can handle anything. */
40165 if (TARGET_64BIT)
40166 return true;
40168 /* For 32-bit, everything's fine if we have one free register. */
40169 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
40170 return true;
40172 /* Need a free register for vcall_offset. */
40173 if (vcall_offset)
40174 return false;
40176 /* Need a free register for GOT references. */
40177 if (flag_pic && !targetm.binds_local_p (function))
40178 return false;
40180 /* Otherwise ok. */
40181 return true;
40184 /* Output the assembler code for a thunk function. THUNK_DECL is the
40185 declaration for the thunk function itself, FUNCTION is the decl for
40186 the target function. DELTA is an immediate constant offset to be
40187 added to THIS. If VCALL_OFFSET is nonzero, the word at
40188 *(*this + vcall_offset) should be added to THIS. */
40190 static void
40191 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
40192 HOST_WIDE_INT vcall_offset, tree function)
40194 rtx this_param = x86_this_parameter (function);
40195 rtx this_reg, tmp, fnaddr;
40196 unsigned int tmp_regno;
40197 rtx_insn *insn;
40199 if (TARGET_64BIT)
40200 tmp_regno = R10_REG;
40201 else
40203 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
40204 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
40205 tmp_regno = AX_REG;
40206 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
40207 tmp_regno = DX_REG;
40208 else
40209 tmp_regno = CX_REG;
40212 emit_note (NOTE_INSN_PROLOGUE_END);
40214 /* CET is enabled, insert EB instruction. */
40215 if ((flag_cf_protection & CF_BRANCH) && TARGET_IBT)
40216 emit_insn (gen_nop_endbr ());
40218 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
40219 pull it in now and let DELTA benefit. */
40220 if (REG_P (this_param))
40221 this_reg = this_param;
40222 else if (vcall_offset)
40224 /* Put the this parameter into %eax. */
40225 this_reg = gen_rtx_REG (Pmode, AX_REG);
40226 emit_move_insn (this_reg, this_param);
40228 else
40229 this_reg = NULL_RTX;
40231 /* Adjust the this parameter by a fixed constant. */
40232 if (delta)
40234 rtx delta_rtx = GEN_INT (delta);
40235 rtx delta_dst = this_reg ? this_reg : this_param;
40237 if (TARGET_64BIT)
40239 if (!x86_64_general_operand (delta_rtx, Pmode))
40241 tmp = gen_rtx_REG (Pmode, tmp_regno);
40242 emit_move_insn (tmp, delta_rtx);
40243 delta_rtx = tmp;
40247 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
40250 /* Adjust the this parameter by a value stored in the vtable. */
40251 if (vcall_offset)
40253 rtx vcall_addr, vcall_mem, this_mem;
40255 tmp = gen_rtx_REG (Pmode, tmp_regno);
40257 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
40258 if (Pmode != ptr_mode)
40259 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
40260 emit_move_insn (tmp, this_mem);
40262 /* Adjust the this parameter. */
40263 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
40264 if (TARGET_64BIT
40265 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
40267 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
40268 emit_move_insn (tmp2, GEN_INT (vcall_offset));
40269 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
40272 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
40273 if (Pmode != ptr_mode)
40274 emit_insn (gen_addsi_1_zext (this_reg,
40275 gen_rtx_REG (ptr_mode,
40276 REGNO (this_reg)),
40277 vcall_mem));
40278 else
40279 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
40282 /* If necessary, drop THIS back to its stack slot. */
40283 if (this_reg && this_reg != this_param)
40284 emit_move_insn (this_param, this_reg);
40286 fnaddr = XEXP (DECL_RTL (function), 0);
40287 if (TARGET_64BIT)
40289 if (!flag_pic || targetm.binds_local_p (function)
40290 || TARGET_PECOFF)
40292 else
40294 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
40295 tmp = gen_rtx_CONST (Pmode, tmp);
40296 fnaddr = gen_const_mem (Pmode, tmp);
40299 else
40301 if (!flag_pic || targetm.binds_local_p (function))
40303 #if TARGET_MACHO
40304 else if (TARGET_MACHO)
40306 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
40307 fnaddr = XEXP (fnaddr, 0);
40309 #endif /* TARGET_MACHO */
40310 else
40312 tmp = gen_rtx_REG (Pmode, CX_REG);
40313 output_set_got (tmp, NULL_RTX);
40315 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
40316 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
40317 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
40318 fnaddr = gen_const_mem (Pmode, fnaddr);
40322 /* Our sibling call patterns do not allow memories, because we have no
40323 predicate that can distinguish between frame and non-frame memory.
40324 For our purposes here, we can get away with (ab)using a jump pattern,
40325 because we're going to do no optimization. */
40326 if (MEM_P (fnaddr))
40328 if (sibcall_insn_operand (fnaddr, word_mode))
40330 fnaddr = XEXP (DECL_RTL (function), 0);
40331 tmp = gen_rtx_MEM (QImode, fnaddr);
40332 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
40333 tmp = emit_call_insn (tmp);
40334 SIBLING_CALL_P (tmp) = 1;
40336 else
40337 emit_jump_insn (gen_indirect_jump (fnaddr));
40339 else
40341 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
40343 // CM_LARGE_PIC always uses pseudo PIC register which is
40344 // uninitialized. Since FUNCTION is local and calling it
40345 // doesn't go through PLT, we use scratch register %r11 as
40346 // PIC register and initialize it here.
40347 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
40348 ix86_init_large_pic_reg (tmp_regno);
40349 fnaddr = legitimize_pic_address (fnaddr,
40350 gen_rtx_REG (Pmode, tmp_regno));
40353 if (!sibcall_insn_operand (fnaddr, word_mode))
40355 tmp = gen_rtx_REG (word_mode, tmp_regno);
40356 if (GET_MODE (fnaddr) != word_mode)
40357 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
40358 emit_move_insn (tmp, fnaddr);
40359 fnaddr = tmp;
40362 tmp = gen_rtx_MEM (QImode, fnaddr);
40363 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
40364 tmp = emit_call_insn (tmp);
40365 SIBLING_CALL_P (tmp) = 1;
40367 emit_barrier ();
40369 /* Emit just enough of rest_of_compilation to get the insns emitted.
40370 Note that use_thunk calls assemble_start_function et al. */
40371 insn = get_insns ();
40372 shorten_branches (insn);
40373 final_start_function (insn, file, 1);
40374 final (insn, file, 1);
40375 final_end_function ();
40378 static void
40379 x86_file_start (void)
40381 default_file_start ();
40382 if (TARGET_16BIT)
40383 fputs ("\t.code16gcc\n", asm_out_file);
40384 #if TARGET_MACHO
40385 darwin_file_start ();
40386 #endif
40387 if (X86_FILE_START_VERSION_DIRECTIVE)
40388 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
40389 if (X86_FILE_START_FLTUSED)
40390 fputs ("\t.global\t__fltused\n", asm_out_file);
40391 if (ix86_asm_dialect == ASM_INTEL)
40392 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
40396 x86_field_alignment (tree type, int computed)
40398 machine_mode mode;
40400 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
40401 return computed;
40402 if (TARGET_IAMCU)
40403 return iamcu_alignment (type, computed);
40404 mode = TYPE_MODE (strip_array_types (type));
40405 if (mode == DFmode || mode == DCmode
40406 || GET_MODE_CLASS (mode) == MODE_INT
40407 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
40408 return MIN (32, computed);
40409 return computed;
40412 /* Print call to TARGET to FILE. */
40414 static void
40415 x86_print_call_or_nop (FILE *file, const char *target)
40417 if (flag_nop_mcount)
40418 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
40419 else
40420 fprintf (file, "1:\tcall\t%s\n", target);
40423 /* Output assembler code to FILE to increment profiler label # LABELNO
40424 for profiling a function entry. */
40425 void
40426 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
40428 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
40429 : MCOUNT_NAME);
40430 if (TARGET_64BIT)
40432 #ifndef NO_PROFILE_COUNTERS
40433 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
40434 #endif
40436 if (!TARGET_PECOFF && flag_pic)
40437 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
40438 else
40439 x86_print_call_or_nop (file, mcount_name);
40441 else if (flag_pic)
40443 #ifndef NO_PROFILE_COUNTERS
40444 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
40445 LPREFIX, labelno);
40446 #endif
40447 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
40449 else
40451 #ifndef NO_PROFILE_COUNTERS
40452 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
40453 LPREFIX, labelno);
40454 #endif
40455 x86_print_call_or_nop (file, mcount_name);
40458 if (flag_record_mcount)
40460 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
40461 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
40462 fprintf (file, "\t.previous\n");
40466 /* We don't have exact information about the insn sizes, but we may assume
40467 quite safely that we are informed about all 1 byte insns and memory
40468 address sizes. This is enough to eliminate unnecessary padding in
40469 99% of cases. */
40472 ix86_min_insn_size (rtx_insn *insn)
40474 int l = 0, len;
40476 if (!INSN_P (insn) || !active_insn_p (insn))
40477 return 0;
40479 /* Discard alignments we've emit and jump instructions. */
40480 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
40481 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
40482 return 0;
40484 /* Important case - calls are always 5 bytes.
40485 It is common to have many calls in the row. */
40486 if (CALL_P (insn)
40487 && symbolic_reference_mentioned_p (PATTERN (insn))
40488 && !SIBLING_CALL_P (insn))
40489 return 5;
40490 len = get_attr_length (insn);
40491 if (len <= 1)
40492 return 1;
40494 /* For normal instructions we rely on get_attr_length being exact,
40495 with a few exceptions. */
40496 if (!JUMP_P (insn))
40498 enum attr_type type = get_attr_type (insn);
40500 switch (type)
40502 case TYPE_MULTI:
40503 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
40504 || asm_noperands (PATTERN (insn)) >= 0)
40505 return 0;
40506 break;
40507 case TYPE_OTHER:
40508 case TYPE_FCMP:
40509 break;
40510 default:
40511 /* Otherwise trust get_attr_length. */
40512 return len;
40515 l = get_attr_length_address (insn);
40516 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
40517 l = 4;
40519 if (l)
40520 return 1+l;
40521 else
40522 return 2;
40525 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
40527 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
40528 window. */
40530 static void
40531 ix86_avoid_jump_mispredicts (void)
40533 rtx_insn *insn, *start = get_insns ();
40534 int nbytes = 0, njumps = 0;
40535 bool isjump = false;
40537 /* Look for all minimal intervals of instructions containing 4 jumps.
40538 The intervals are bounded by START and INSN. NBYTES is the total
40539 size of instructions in the interval including INSN and not including
40540 START. When the NBYTES is smaller than 16 bytes, it is possible
40541 that the end of START and INSN ends up in the same 16byte page.
40543 The smallest offset in the page INSN can start is the case where START
40544 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
40545 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
40547 Don't consider asm goto as jump, while it can contain a jump, it doesn't
40548 have to, control transfer to label(s) can be performed through other
40549 means, and also we estimate minimum length of all asm stmts as 0. */
40550 for (insn = start; insn; insn = NEXT_INSN (insn))
40552 int min_size;
40554 if (LABEL_P (insn))
40556 int align = label_to_alignment (insn);
40557 int max_skip = label_to_max_skip (insn);
40559 if (max_skip > 15)
40560 max_skip = 15;
40561 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
40562 already in the current 16 byte page, because otherwise
40563 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
40564 bytes to reach 16 byte boundary. */
40565 if (align <= 0
40566 || (align <= 3 && max_skip != (1 << align) - 1))
40567 max_skip = 0;
40568 if (dump_file)
40569 fprintf (dump_file, "Label %i with max_skip %i\n",
40570 INSN_UID (insn), max_skip);
40571 if (max_skip)
40573 while (nbytes + max_skip >= 16)
40575 start = NEXT_INSN (start);
40576 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
40577 || CALL_P (start))
40578 njumps--, isjump = true;
40579 else
40580 isjump = false;
40581 nbytes -= ix86_min_insn_size (start);
40584 continue;
40587 min_size = ix86_min_insn_size (insn);
40588 nbytes += min_size;
40589 if (dump_file)
40590 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
40591 INSN_UID (insn), min_size);
40592 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
40593 || CALL_P (insn))
40594 njumps++;
40595 else
40596 continue;
40598 while (njumps > 3)
40600 start = NEXT_INSN (start);
40601 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
40602 || CALL_P (start))
40603 njumps--, isjump = true;
40604 else
40605 isjump = false;
40606 nbytes -= ix86_min_insn_size (start);
40608 gcc_assert (njumps >= 0);
40609 if (dump_file)
40610 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
40611 INSN_UID (start), INSN_UID (insn), nbytes);
40613 if (njumps == 3 && isjump && nbytes < 16)
40615 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
40617 if (dump_file)
40618 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
40619 INSN_UID (insn), padsize);
40620 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
40624 #endif
40626 /* AMD Athlon works faster
40627 when RET is not destination of conditional jump or directly preceded
40628 by other jump instruction. We avoid the penalty by inserting NOP just
40629 before the RET instructions in such cases. */
40630 static void
40631 ix86_pad_returns (void)
40633 edge e;
40634 edge_iterator ei;
40636 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
40638 basic_block bb = e->src;
40639 rtx_insn *ret = BB_END (bb);
40640 rtx_insn *prev;
40641 bool replace = false;
40643 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
40644 || optimize_bb_for_size_p (bb))
40645 continue;
40646 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
40647 if (active_insn_p (prev) || LABEL_P (prev))
40648 break;
40649 if (prev && LABEL_P (prev))
40651 edge e;
40652 edge_iterator ei;
40654 FOR_EACH_EDGE (e, ei, bb->preds)
40655 if (EDGE_FREQUENCY (e) && e->src->index >= 0
40656 && !(e->flags & EDGE_FALLTHRU))
40658 replace = true;
40659 break;
40662 if (!replace)
40664 prev = prev_active_insn (ret);
40665 if (prev
40666 && ((JUMP_P (prev) && any_condjump_p (prev))
40667 || CALL_P (prev)))
40668 replace = true;
40669 /* Empty functions get branch mispredict even when
40670 the jump destination is not visible to us. */
40671 if (!prev && !optimize_function_for_size_p (cfun))
40672 replace = true;
40674 if (replace)
40676 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
40677 delete_insn (ret);
40682 /* Count the minimum number of instructions in BB. Return 4 if the
40683 number of instructions >= 4. */
40685 static int
40686 ix86_count_insn_bb (basic_block bb)
40688 rtx_insn *insn;
40689 int insn_count = 0;
40691 /* Count number of instructions in this block. Return 4 if the number
40692 of instructions >= 4. */
40693 FOR_BB_INSNS (bb, insn)
40695 /* Only happen in exit blocks. */
40696 if (JUMP_P (insn)
40697 && ANY_RETURN_P (PATTERN (insn)))
40698 break;
40700 if (NONDEBUG_INSN_P (insn)
40701 && GET_CODE (PATTERN (insn)) != USE
40702 && GET_CODE (PATTERN (insn)) != CLOBBER)
40704 insn_count++;
40705 if (insn_count >= 4)
40706 return insn_count;
40710 return insn_count;
40714 /* Count the minimum number of instructions in code path in BB.
40715 Return 4 if the number of instructions >= 4. */
40717 static int
40718 ix86_count_insn (basic_block bb)
40720 edge e;
40721 edge_iterator ei;
40722 int min_prev_count;
40724 /* Only bother counting instructions along paths with no
40725 more than 2 basic blocks between entry and exit. Given
40726 that BB has an edge to exit, determine if a predecessor
40727 of BB has an edge from entry. If so, compute the number
40728 of instructions in the predecessor block. If there
40729 happen to be multiple such blocks, compute the minimum. */
40730 min_prev_count = 4;
40731 FOR_EACH_EDGE (e, ei, bb->preds)
40733 edge prev_e;
40734 edge_iterator prev_ei;
40736 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
40738 min_prev_count = 0;
40739 break;
40741 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
40743 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
40745 int count = ix86_count_insn_bb (e->src);
40746 if (count < min_prev_count)
40747 min_prev_count = count;
40748 break;
40753 if (min_prev_count < 4)
40754 min_prev_count += ix86_count_insn_bb (bb);
40756 return min_prev_count;
40759 /* Pad short function to 4 instructions. */
40761 static void
40762 ix86_pad_short_function (void)
40764 edge e;
40765 edge_iterator ei;
40767 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
40769 rtx_insn *ret = BB_END (e->src);
40770 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
40772 int insn_count = ix86_count_insn (e->src);
40774 /* Pad short function. */
40775 if (insn_count < 4)
40777 rtx_insn *insn = ret;
40779 /* Find epilogue. */
40780 while (insn
40781 && (!NOTE_P (insn)
40782 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
40783 insn = PREV_INSN (insn);
40785 if (!insn)
40786 insn = ret;
40788 /* Two NOPs count as one instruction. */
40789 insn_count = 2 * (4 - insn_count);
40790 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
40796 /* Fix up a Windows system unwinder issue. If an EH region falls through into
40797 the epilogue, the Windows system unwinder will apply epilogue logic and
40798 produce incorrect offsets. This can be avoided by adding a nop between
40799 the last insn that can throw and the first insn of the epilogue. */
40801 static void
40802 ix86_seh_fixup_eh_fallthru (void)
40804 edge e;
40805 edge_iterator ei;
40807 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
40809 rtx_insn *insn, *next;
40811 /* Find the beginning of the epilogue. */
40812 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
40813 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
40814 break;
40815 if (insn == NULL)
40816 continue;
40818 /* We only care about preceding insns that can throw. */
40819 insn = prev_active_insn (insn);
40820 if (insn == NULL || !can_throw_internal (insn))
40821 continue;
40823 /* Do not separate calls from their debug information. */
40824 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
40825 if (NOTE_P (next)
40826 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
40827 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
40828 insn = next;
40829 else
40830 break;
40832 emit_insn_after (gen_nops (const1_rtx), insn);
40836 /* Given a register number BASE, the lowest of a group of registers, update
40837 regsets IN and OUT with the registers that should be avoided in input
40838 and output operands respectively when trying to avoid generating a modr/m
40839 byte for -fmitigate-rop. */
40841 static void
40842 set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out)
40844 SET_HARD_REG_BIT (out, base);
40845 SET_HARD_REG_BIT (out, base + 1);
40846 SET_HARD_REG_BIT (in, base + 2);
40847 SET_HARD_REG_BIT (in, base + 3);
40850 /* Called if -fmitigate_rop is in effect. Try to rewrite instructions so
40851 that certain encodings of modr/m bytes do not occur. */
40852 static void
40853 ix86_mitigate_rop (void)
40855 HARD_REG_SET input_risky;
40856 HARD_REG_SET output_risky;
40857 HARD_REG_SET inout_risky;
40859 CLEAR_HARD_REG_SET (output_risky);
40860 CLEAR_HARD_REG_SET (input_risky);
40861 SET_HARD_REG_BIT (output_risky, AX_REG);
40862 SET_HARD_REG_BIT (output_risky, CX_REG);
40863 SET_HARD_REG_BIT (input_risky, BX_REG);
40864 SET_HARD_REG_BIT (input_risky, DX_REG);
40865 set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky);
40866 set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky);
40867 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky);
40868 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky);
40869 set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky);
40870 set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky);
40871 COPY_HARD_REG_SET (inout_risky, input_risky);
40872 IOR_HARD_REG_SET (inout_risky, output_risky);
40874 df_note_add_problem ();
40875 /* Fix up what stack-regs did. */
40876 df_insn_rescan_all ();
40877 df_analyze ();
40879 regrename_init (true);
40880 regrename_analyze (NULL);
40882 auto_vec<du_head_p> cands;
40884 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
40886 if (!NONDEBUG_INSN_P (insn))
40887 continue;
40889 if (GET_CODE (PATTERN (insn)) == USE
40890 || GET_CODE (PATTERN (insn)) == CLOBBER)
40891 continue;
40893 extract_insn (insn);
40895 int opno0, opno1;
40896 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
40897 recog_data.n_operands, &opno0,
40898 &opno1);
40900 if (!ix86_rop_should_change_byte_p (modrm))
40901 continue;
40903 insn_rr_info *info = &insn_rr[INSN_UID (insn)];
40905 /* This happens when regrename has to fail a block. */
40906 if (!info->op_info)
40907 continue;
40909 if (info->op_info[opno0].n_chains != 0)
40911 gcc_assert (info->op_info[opno0].n_chains == 1);
40912 du_head_p op0c;
40913 op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id);
40914 if (op0c->target_data_1 + op0c->target_data_2 == 0
40915 && !op0c->cannot_rename)
40916 cands.safe_push (op0c);
40918 op0c->target_data_1++;
40920 if (info->op_info[opno1].n_chains != 0)
40922 gcc_assert (info->op_info[opno1].n_chains == 1);
40923 du_head_p op1c;
40924 op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id);
40925 if (op1c->target_data_1 + op1c->target_data_2 == 0
40926 && !op1c->cannot_rename)
40927 cands.safe_push (op1c);
40929 op1c->target_data_2++;
40933 int i;
40934 du_head_p head;
40935 FOR_EACH_VEC_ELT (cands, i, head)
40937 int old_reg, best_reg;
40938 HARD_REG_SET unavailable;
40940 CLEAR_HARD_REG_SET (unavailable);
40941 if (head->target_data_1)
40942 IOR_HARD_REG_SET (unavailable, output_risky);
40943 if (head->target_data_2)
40944 IOR_HARD_REG_SET (unavailable, input_risky);
40946 int n_uses;
40947 reg_class superclass = regrename_find_superclass (head, &n_uses,
40948 &unavailable);
40949 old_reg = head->regno;
40950 best_reg = find_rename_reg (head, superclass, &unavailable,
40951 old_reg, false);
40952 bool ok = regrename_do_replace (head, best_reg);
40953 gcc_assert (ok);
40954 if (dump_file)
40955 fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id,
40956 reg_names[best_reg], reg_class_names[superclass]);
40960 regrename_finish ();
40962 df_analyze ();
40964 basic_block bb;
40965 regset_head live;
40967 INIT_REG_SET (&live);
40969 FOR_EACH_BB_FN (bb, cfun)
40971 rtx_insn *insn;
40973 COPY_REG_SET (&live, DF_LR_OUT (bb));
40974 df_simulate_initialize_backwards (bb, &live);
40976 FOR_BB_INSNS_REVERSE (bb, insn)
40978 if (!NONDEBUG_INSN_P (insn))
40979 continue;
40981 df_simulate_one_insn_backwards (bb, insn, &live);
40983 if (GET_CODE (PATTERN (insn)) == USE
40984 || GET_CODE (PATTERN (insn)) == CLOBBER)
40985 continue;
40987 extract_insn (insn);
40988 constrain_operands_cached (insn, reload_completed);
40989 int opno0, opno1;
40990 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
40991 recog_data.n_operands, &opno0,
40992 &opno1);
40993 if (modrm < 0
40994 || !ix86_rop_should_change_byte_p (modrm)
40995 || opno0 == opno1)
40996 continue;
40998 rtx oldreg = recog_data.operand[opno1];
40999 preprocess_constraints (insn);
41000 const operand_alternative *alt = which_op_alt ();
41002 int i;
41003 for (i = 0; i < recog_data.n_operands; i++)
41004 if (i != opno1
41005 && alt[i].earlyclobber
41006 && reg_overlap_mentioned_p (recog_data.operand[i],
41007 oldreg))
41008 break;
41010 if (i < recog_data.n_operands)
41011 continue;
41013 if (dump_file)
41014 fprintf (dump_file,
41015 "attempting to fix modrm byte in insn %d:"
41016 " reg %d class %s", INSN_UID (insn), REGNO (oldreg),
41017 reg_class_names[alt[opno1].cl]);
41019 HARD_REG_SET unavailable;
41020 REG_SET_TO_HARD_REG_SET (unavailable, &live);
41021 SET_HARD_REG_BIT (unavailable, REGNO (oldreg));
41022 IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set);
41023 IOR_HARD_REG_SET (unavailable, fixed_reg_set);
41024 IOR_HARD_REG_SET (unavailable, output_risky);
41025 IOR_COMPL_HARD_REG_SET (unavailable,
41026 reg_class_contents[alt[opno1].cl]);
41028 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41029 if (!TEST_HARD_REG_BIT (unavailable, i))
41030 break;
41031 if (i == FIRST_PSEUDO_REGISTER)
41033 if (dump_file)
41034 fprintf (dump_file, ", none available\n");
41035 continue;
41037 if (dump_file)
41038 fprintf (dump_file, " -> %d\n", i);
41039 rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i);
41040 validate_change (insn, recog_data.operand_loc[opno1], newreg, false);
41041 insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn);
41046 /* Implement machine specific optimizations. We implement padding of returns
41047 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
41048 static void
41049 ix86_reorg (void)
41051 /* We are freeing block_for_insn in the toplev to keep compatibility
41052 with old MDEP_REORGS that are not CFG based. Recompute it now. */
41053 compute_bb_for_insn ();
41055 if (flag_mitigate_rop)
41056 ix86_mitigate_rop ();
41058 if (TARGET_SEH && current_function_has_exception_handlers ())
41059 ix86_seh_fixup_eh_fallthru ();
41061 if (optimize && optimize_function_for_speed_p (cfun))
41063 if (TARGET_PAD_SHORT_FUNCTION)
41064 ix86_pad_short_function ();
41065 else if (TARGET_PAD_RETURNS)
41066 ix86_pad_returns ();
41067 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
41068 if (TARGET_FOUR_JUMP_LIMIT)
41069 ix86_avoid_jump_mispredicts ();
41070 #endif
41074 /* Return nonzero when QImode register that must be represented via REX prefix
41075 is used. */
41076 bool
41077 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
41079 int i;
41080 extract_insn_cached (insn);
41081 for (i = 0; i < recog_data.n_operands; i++)
41082 if (GENERAL_REG_P (recog_data.operand[i])
41083 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
41084 return true;
41085 return false;
41088 /* Return true when INSN mentions register that must be encoded using REX
41089 prefix. */
41090 bool
41091 x86_extended_reg_mentioned_p (rtx insn)
41093 subrtx_iterator::array_type array;
41094 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
41096 const_rtx x = *iter;
41097 if (REG_P (x)
41098 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
41099 return true;
41101 return false;
41104 /* If profitable, negate (without causing overflow) integer constant
41105 of mode MODE at location LOC. Return true in this case. */
41106 bool
41107 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
41109 HOST_WIDE_INT val;
41111 if (!CONST_INT_P (*loc))
41112 return false;
41114 switch (mode)
41116 case E_DImode:
41117 /* DImode x86_64 constants must fit in 32 bits. */
41118 gcc_assert (x86_64_immediate_operand (*loc, mode));
41120 mode = SImode;
41121 break;
41123 case E_SImode:
41124 case E_HImode:
41125 case E_QImode:
41126 break;
41128 default:
41129 gcc_unreachable ();
41132 /* Avoid overflows. */
41133 if (mode_signbit_p (mode, *loc))
41134 return false;
41136 val = INTVAL (*loc);
41138 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
41139 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
41140 if ((val < 0 && val != -128)
41141 || val == 128)
41143 *loc = GEN_INT (-val);
41144 return true;
41147 return false;
41150 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
41151 optabs would emit if we didn't have TFmode patterns. */
41153 void
41154 x86_emit_floatuns (rtx operands[2])
41156 rtx_code_label *neglab, *donelab;
41157 rtx i0, i1, f0, in, out;
41158 machine_mode mode, inmode;
41160 inmode = GET_MODE (operands[1]);
41161 gcc_assert (inmode == SImode || inmode == DImode);
41163 out = operands[0];
41164 in = force_reg (inmode, operands[1]);
41165 mode = GET_MODE (out);
41166 neglab = gen_label_rtx ();
41167 donelab = gen_label_rtx ();
41168 f0 = gen_reg_rtx (mode);
41170 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
41172 expand_float (out, in, 0);
41174 emit_jump_insn (gen_jump (donelab));
41175 emit_barrier ();
41177 emit_label (neglab);
41179 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
41180 1, OPTAB_DIRECT);
41181 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
41182 1, OPTAB_DIRECT);
41183 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
41185 expand_float (f0, i0, 0);
41187 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
41189 emit_label (donelab);
41192 static bool canonicalize_perm (struct expand_vec_perm_d *d);
41193 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
41194 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
41195 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
41197 /* Get a vector mode of the same size as the original but with elements
41198 twice as wide. This is only guaranteed to apply to integral vectors. */
41200 static inline machine_mode
41201 get_mode_wider_vector (machine_mode o)
41203 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
41204 machine_mode n = GET_MODE_WIDER_MODE (o).require ();
41205 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
41206 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
41207 return n;
41210 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
41211 fill target with val via vec_duplicate. */
41213 static bool
41214 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
41216 bool ok;
41217 rtx_insn *insn;
41218 rtx dup;
41220 /* First attempt to recognize VAL as-is. */
41221 dup = gen_vec_duplicate (mode, val);
41222 insn = emit_insn (gen_rtx_SET (target, dup));
41223 if (recog_memoized (insn) < 0)
41225 rtx_insn *seq;
41226 machine_mode innermode = GET_MODE_INNER (mode);
41227 rtx reg;
41229 /* If that fails, force VAL into a register. */
41231 start_sequence ();
41232 reg = force_reg (innermode, val);
41233 if (GET_MODE (reg) != innermode)
41234 reg = gen_lowpart (innermode, reg);
41235 XEXP (dup, 0) = reg;
41236 seq = get_insns ();
41237 end_sequence ();
41238 if (seq)
41239 emit_insn_before (seq, insn);
41241 ok = recog_memoized (insn) >= 0;
41242 gcc_assert (ok);
41244 return true;
41247 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
41248 with all elements equal to VAR. Return true if successful. */
41250 static bool
41251 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
41252 rtx target, rtx val)
41254 bool ok;
41256 switch (mode)
41258 case E_V2SImode:
41259 case E_V2SFmode:
41260 if (!mmx_ok)
41261 return false;
41262 /* FALLTHRU */
41264 case E_V4DFmode:
41265 case E_V4DImode:
41266 case E_V8SFmode:
41267 case E_V8SImode:
41268 case E_V2DFmode:
41269 case E_V2DImode:
41270 case E_V4SFmode:
41271 case E_V4SImode:
41272 case E_V16SImode:
41273 case E_V8DImode:
41274 case E_V16SFmode:
41275 case E_V8DFmode:
41276 return ix86_vector_duplicate_value (mode, target, val);
41278 case E_V4HImode:
41279 if (!mmx_ok)
41280 return false;
41281 if (TARGET_SSE || TARGET_3DNOW_A)
41283 rtx x;
41285 val = gen_lowpart (SImode, val);
41286 x = gen_rtx_TRUNCATE (HImode, val);
41287 x = gen_rtx_VEC_DUPLICATE (mode, x);
41288 emit_insn (gen_rtx_SET (target, x));
41289 return true;
41291 goto widen;
41293 case E_V8QImode:
41294 if (!mmx_ok)
41295 return false;
41296 goto widen;
41298 case E_V8HImode:
41299 if (TARGET_AVX2)
41300 return ix86_vector_duplicate_value (mode, target, val);
41302 if (TARGET_SSE2)
41304 struct expand_vec_perm_d dperm;
41305 rtx tmp1, tmp2;
41307 permute:
41308 memset (&dperm, 0, sizeof (dperm));
41309 dperm.target = target;
41310 dperm.vmode = mode;
41311 dperm.nelt = GET_MODE_NUNITS (mode);
41312 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
41313 dperm.one_operand_p = true;
41315 /* Extend to SImode using a paradoxical SUBREG. */
41316 tmp1 = gen_reg_rtx (SImode);
41317 emit_move_insn (tmp1, gen_lowpart (SImode, val));
41319 /* Insert the SImode value as low element of a V4SImode vector. */
41320 tmp2 = gen_reg_rtx (V4SImode);
41321 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
41322 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
41324 ok = (expand_vec_perm_1 (&dperm)
41325 || expand_vec_perm_broadcast_1 (&dperm));
41326 gcc_assert (ok);
41327 return ok;
41329 goto widen;
41331 case E_V16QImode:
41332 if (TARGET_AVX2)
41333 return ix86_vector_duplicate_value (mode, target, val);
41335 if (TARGET_SSE2)
41336 goto permute;
41337 goto widen;
41339 widen:
41340 /* Replicate the value once into the next wider mode and recurse. */
41342 machine_mode smode, wsmode, wvmode;
41343 rtx x;
41345 smode = GET_MODE_INNER (mode);
41346 wvmode = get_mode_wider_vector (mode);
41347 wsmode = GET_MODE_INNER (wvmode);
41349 val = convert_modes (wsmode, smode, val, true);
41350 x = expand_simple_binop (wsmode, ASHIFT, val,
41351 GEN_INT (GET_MODE_BITSIZE (smode)),
41352 NULL_RTX, 1, OPTAB_LIB_WIDEN);
41353 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
41355 x = gen_reg_rtx (wvmode);
41356 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
41357 gcc_assert (ok);
41358 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
41359 return ok;
41362 case E_V16HImode:
41363 case E_V32QImode:
41364 if (TARGET_AVX2)
41365 return ix86_vector_duplicate_value (mode, target, val);
41366 else
41368 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
41369 rtx x = gen_reg_rtx (hvmode);
41371 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
41372 gcc_assert (ok);
41374 x = gen_rtx_VEC_CONCAT (mode, x, x);
41375 emit_insn (gen_rtx_SET (target, x));
41377 return true;
41379 case E_V64QImode:
41380 case E_V32HImode:
41381 if (TARGET_AVX512BW)
41382 return ix86_vector_duplicate_value (mode, target, val);
41383 else
41385 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
41386 rtx x = gen_reg_rtx (hvmode);
41388 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
41389 gcc_assert (ok);
41391 x = gen_rtx_VEC_CONCAT (mode, x, x);
41392 emit_insn (gen_rtx_SET (target, x));
41394 return true;
41396 default:
41397 return false;
41401 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
41402 whose ONE_VAR element is VAR, and other elements are zero. Return true
41403 if successful. */
41405 static bool
41406 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
41407 rtx target, rtx var, int one_var)
41409 machine_mode vsimode;
41410 rtx new_target;
41411 rtx x, tmp;
41412 bool use_vector_set = false;
41414 switch (mode)
41416 case E_V2DImode:
41417 /* For SSE4.1, we normally use vector set. But if the second
41418 element is zero and inter-unit moves are OK, we use movq
41419 instead. */
41420 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
41421 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
41422 && one_var == 0));
41423 break;
41424 case E_V16QImode:
41425 case E_V4SImode:
41426 case E_V4SFmode:
41427 use_vector_set = TARGET_SSE4_1;
41428 break;
41429 case E_V8HImode:
41430 use_vector_set = TARGET_SSE2;
41431 break;
41432 case E_V4HImode:
41433 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
41434 break;
41435 case E_V32QImode:
41436 case E_V16HImode:
41437 case E_V8SImode:
41438 case E_V8SFmode:
41439 case E_V4DFmode:
41440 use_vector_set = TARGET_AVX;
41441 break;
41442 case E_V4DImode:
41443 /* Use ix86_expand_vector_set in 64bit mode only. */
41444 use_vector_set = TARGET_AVX && TARGET_64BIT;
41445 break;
41446 default:
41447 break;
41450 if (use_vector_set)
41452 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
41453 var = force_reg (GET_MODE_INNER (mode), var);
41454 ix86_expand_vector_set (mmx_ok, target, var, one_var);
41455 return true;
41458 switch (mode)
41460 case E_V2SFmode:
41461 case E_V2SImode:
41462 if (!mmx_ok)
41463 return false;
41464 /* FALLTHRU */
41466 case E_V2DFmode:
41467 case E_V2DImode:
41468 if (one_var != 0)
41469 return false;
41470 var = force_reg (GET_MODE_INNER (mode), var);
41471 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
41472 emit_insn (gen_rtx_SET (target, x));
41473 return true;
41475 case E_V4SFmode:
41476 case E_V4SImode:
41477 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
41478 new_target = gen_reg_rtx (mode);
41479 else
41480 new_target = target;
41481 var = force_reg (GET_MODE_INNER (mode), var);
41482 x = gen_rtx_VEC_DUPLICATE (mode, var);
41483 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
41484 emit_insn (gen_rtx_SET (new_target, x));
41485 if (one_var != 0)
41487 /* We need to shuffle the value to the correct position, so
41488 create a new pseudo to store the intermediate result. */
41490 /* With SSE2, we can use the integer shuffle insns. */
41491 if (mode != V4SFmode && TARGET_SSE2)
41493 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
41494 const1_rtx,
41495 GEN_INT (one_var == 1 ? 0 : 1),
41496 GEN_INT (one_var == 2 ? 0 : 1),
41497 GEN_INT (one_var == 3 ? 0 : 1)));
41498 if (target != new_target)
41499 emit_move_insn (target, new_target);
41500 return true;
41503 /* Otherwise convert the intermediate result to V4SFmode and
41504 use the SSE1 shuffle instructions. */
41505 if (mode != V4SFmode)
41507 tmp = gen_reg_rtx (V4SFmode);
41508 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
41510 else
41511 tmp = new_target;
41513 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
41514 const1_rtx,
41515 GEN_INT (one_var == 1 ? 0 : 1),
41516 GEN_INT (one_var == 2 ? 0+4 : 1+4),
41517 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
41519 if (mode != V4SFmode)
41520 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
41521 else if (tmp != target)
41522 emit_move_insn (target, tmp);
41524 else if (target != new_target)
41525 emit_move_insn (target, new_target);
41526 return true;
41528 case E_V8HImode:
41529 case E_V16QImode:
41530 vsimode = V4SImode;
41531 goto widen;
41532 case E_V4HImode:
41533 case E_V8QImode:
41534 if (!mmx_ok)
41535 return false;
41536 vsimode = V2SImode;
41537 goto widen;
41538 widen:
41539 if (one_var != 0)
41540 return false;
41542 /* Zero extend the variable element to SImode and recurse. */
41543 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
41545 x = gen_reg_rtx (vsimode);
41546 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
41547 var, one_var))
41548 gcc_unreachable ();
41550 emit_move_insn (target, gen_lowpart (mode, x));
41551 return true;
41553 default:
41554 return false;
41558 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
41559 consisting of the values in VALS. It is known that all elements
41560 except ONE_VAR are constants. Return true if successful. */
41562 static bool
41563 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
41564 rtx target, rtx vals, int one_var)
41566 rtx var = XVECEXP (vals, 0, one_var);
41567 machine_mode wmode;
41568 rtx const_vec, x;
41570 const_vec = copy_rtx (vals);
41571 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
41572 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
41574 switch (mode)
41576 case E_V2DFmode:
41577 case E_V2DImode:
41578 case E_V2SFmode:
41579 case E_V2SImode:
41580 /* For the two element vectors, it's just as easy to use
41581 the general case. */
41582 return false;
41584 case E_V4DImode:
41585 /* Use ix86_expand_vector_set in 64bit mode only. */
41586 if (!TARGET_64BIT)
41587 return false;
41588 /* FALLTHRU */
41589 case E_V4DFmode:
41590 case E_V8SFmode:
41591 case E_V8SImode:
41592 case E_V16HImode:
41593 case E_V32QImode:
41594 case E_V4SFmode:
41595 case E_V4SImode:
41596 case E_V8HImode:
41597 case E_V4HImode:
41598 break;
41600 case E_V16QImode:
41601 if (TARGET_SSE4_1)
41602 break;
41603 wmode = V8HImode;
41604 goto widen;
41605 case E_V8QImode:
41606 wmode = V4HImode;
41607 goto widen;
41608 widen:
41609 /* There's no way to set one QImode entry easily. Combine
41610 the variable value with its adjacent constant value, and
41611 promote to an HImode set. */
41612 x = XVECEXP (vals, 0, one_var ^ 1);
41613 if (one_var & 1)
41615 var = convert_modes (HImode, QImode, var, true);
41616 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
41617 NULL_RTX, 1, OPTAB_LIB_WIDEN);
41618 x = GEN_INT (INTVAL (x) & 0xff);
41620 else
41622 var = convert_modes (HImode, QImode, var, true);
41623 x = gen_int_mode (INTVAL (x) << 8, HImode);
41625 if (x != const0_rtx)
41626 var = expand_simple_binop (HImode, IOR, var, x, var,
41627 1, OPTAB_LIB_WIDEN);
41629 x = gen_reg_rtx (wmode);
41630 emit_move_insn (x, gen_lowpart (wmode, const_vec));
41631 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
41633 emit_move_insn (target, gen_lowpart (mode, x));
41634 return true;
41636 default:
41637 return false;
41640 emit_move_insn (target, const_vec);
41641 ix86_expand_vector_set (mmx_ok, target, var, one_var);
41642 return true;
41645 /* A subroutine of ix86_expand_vector_init_general. Use vector
41646 concatenate to handle the most general case: all values variable,
41647 and none identical. */
41649 static void
41650 ix86_expand_vector_init_concat (machine_mode mode,
41651 rtx target, rtx *ops, int n)
41653 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
41654 rtx first[16], second[8], third[4];
41655 rtvec v;
41656 int i, j;
41658 switch (n)
41660 case 2:
41661 switch (mode)
41663 case E_V16SImode:
41664 cmode = V8SImode;
41665 break;
41666 case E_V16SFmode:
41667 cmode = V8SFmode;
41668 break;
41669 case E_V8DImode:
41670 cmode = V4DImode;
41671 break;
41672 case E_V8DFmode:
41673 cmode = V4DFmode;
41674 break;
41675 case E_V8SImode:
41676 cmode = V4SImode;
41677 break;
41678 case E_V8SFmode:
41679 cmode = V4SFmode;
41680 break;
41681 case E_V4DImode:
41682 cmode = V2DImode;
41683 break;
41684 case E_V4DFmode:
41685 cmode = V2DFmode;
41686 break;
41687 case E_V4SImode:
41688 cmode = V2SImode;
41689 break;
41690 case E_V4SFmode:
41691 cmode = V2SFmode;
41692 break;
41693 case E_V2DImode:
41694 cmode = DImode;
41695 break;
41696 case E_V2SImode:
41697 cmode = SImode;
41698 break;
41699 case E_V2DFmode:
41700 cmode = DFmode;
41701 break;
41702 case E_V2SFmode:
41703 cmode = SFmode;
41704 break;
41705 default:
41706 gcc_unreachable ();
41709 if (!register_operand (ops[1], cmode))
41710 ops[1] = force_reg (cmode, ops[1]);
41711 if (!register_operand (ops[0], cmode))
41712 ops[0] = force_reg (cmode, ops[0]);
41713 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
41714 ops[1])));
41715 break;
41717 case 4:
41718 switch (mode)
41720 case E_V4DImode:
41721 cmode = V2DImode;
41722 break;
41723 case E_V4DFmode:
41724 cmode = V2DFmode;
41725 break;
41726 case E_V4SImode:
41727 cmode = V2SImode;
41728 break;
41729 case E_V4SFmode:
41730 cmode = V2SFmode;
41731 break;
41732 default:
41733 gcc_unreachable ();
41735 goto half;
41737 case 8:
41738 switch (mode)
41740 case E_V8DImode:
41741 cmode = V2DImode;
41742 hmode = V4DImode;
41743 break;
41744 case E_V8DFmode:
41745 cmode = V2DFmode;
41746 hmode = V4DFmode;
41747 break;
41748 case E_V8SImode:
41749 cmode = V2SImode;
41750 hmode = V4SImode;
41751 break;
41752 case E_V8SFmode:
41753 cmode = V2SFmode;
41754 hmode = V4SFmode;
41755 break;
41756 default:
41757 gcc_unreachable ();
41759 goto half;
41761 case 16:
41762 switch (mode)
41764 case E_V16SImode:
41765 cmode = V2SImode;
41766 hmode = V4SImode;
41767 gmode = V8SImode;
41768 break;
41769 case E_V16SFmode:
41770 cmode = V2SFmode;
41771 hmode = V4SFmode;
41772 gmode = V8SFmode;
41773 break;
41774 default:
41775 gcc_unreachable ();
41777 goto half;
41779 half:
41780 /* FIXME: We process inputs backward to help RA. PR 36222. */
41781 i = n - 1;
41782 j = (n >> 1) - 1;
41783 for (; i > 0; i -= 2, j--)
41785 first[j] = gen_reg_rtx (cmode);
41786 v = gen_rtvec (2, ops[i - 1], ops[i]);
41787 ix86_expand_vector_init (false, first[j],
41788 gen_rtx_PARALLEL (cmode, v));
41791 n >>= 1;
41792 if (n > 4)
41794 gcc_assert (hmode != VOIDmode);
41795 gcc_assert (gmode != VOIDmode);
41796 for (i = j = 0; i < n; i += 2, j++)
41798 second[j] = gen_reg_rtx (hmode);
41799 ix86_expand_vector_init_concat (hmode, second [j],
41800 &first [i], 2);
41802 n >>= 1;
41803 for (i = j = 0; i < n; i += 2, j++)
41805 third[j] = gen_reg_rtx (gmode);
41806 ix86_expand_vector_init_concat (gmode, third[j],
41807 &second[i], 2);
41809 n >>= 1;
41810 ix86_expand_vector_init_concat (mode, target, third, n);
41812 else if (n > 2)
41814 gcc_assert (hmode != VOIDmode);
41815 for (i = j = 0; i < n; i += 2, j++)
41817 second[j] = gen_reg_rtx (hmode);
41818 ix86_expand_vector_init_concat (hmode, second [j],
41819 &first [i], 2);
41821 n >>= 1;
41822 ix86_expand_vector_init_concat (mode, target, second, n);
41824 else
41825 ix86_expand_vector_init_concat (mode, target, first, n);
41826 break;
41828 default:
41829 gcc_unreachable ();
41833 /* A subroutine of ix86_expand_vector_init_general. Use vector
41834 interleave to handle the most general case: all values variable,
41835 and none identical. */
41837 static void
41838 ix86_expand_vector_init_interleave (machine_mode mode,
41839 rtx target, rtx *ops, int n)
41841 machine_mode first_imode, second_imode, third_imode, inner_mode;
41842 int i, j;
41843 rtx op0, op1;
41844 rtx (*gen_load_even) (rtx, rtx, rtx);
41845 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
41846 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
41848 switch (mode)
41850 case E_V8HImode:
41851 gen_load_even = gen_vec_setv8hi;
41852 gen_interleave_first_low = gen_vec_interleave_lowv4si;
41853 gen_interleave_second_low = gen_vec_interleave_lowv2di;
41854 inner_mode = HImode;
41855 first_imode = V4SImode;
41856 second_imode = V2DImode;
41857 third_imode = VOIDmode;
41858 break;
41859 case E_V16QImode:
41860 gen_load_even = gen_vec_setv16qi;
41861 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
41862 gen_interleave_second_low = gen_vec_interleave_lowv4si;
41863 inner_mode = QImode;
41864 first_imode = V8HImode;
41865 second_imode = V4SImode;
41866 third_imode = V2DImode;
41867 break;
41868 default:
41869 gcc_unreachable ();
41872 for (i = 0; i < n; i++)
41874 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
41875 op0 = gen_reg_rtx (SImode);
41876 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
41878 /* Insert the SImode value as low element of V4SImode vector. */
41879 op1 = gen_reg_rtx (V4SImode);
41880 op0 = gen_rtx_VEC_MERGE (V4SImode,
41881 gen_rtx_VEC_DUPLICATE (V4SImode,
41882 op0),
41883 CONST0_RTX (V4SImode),
41884 const1_rtx);
41885 emit_insn (gen_rtx_SET (op1, op0));
41887 /* Cast the V4SImode vector back to a vector in orignal mode. */
41888 op0 = gen_reg_rtx (mode);
41889 emit_move_insn (op0, gen_lowpart (mode, op1));
41891 /* Load even elements into the second position. */
41892 emit_insn (gen_load_even (op0,
41893 force_reg (inner_mode,
41894 ops [i + i + 1]),
41895 const1_rtx));
41897 /* Cast vector to FIRST_IMODE vector. */
41898 ops[i] = gen_reg_rtx (first_imode);
41899 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
41902 /* Interleave low FIRST_IMODE vectors. */
41903 for (i = j = 0; i < n; i += 2, j++)
41905 op0 = gen_reg_rtx (first_imode);
41906 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
41908 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
41909 ops[j] = gen_reg_rtx (second_imode);
41910 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
41913 /* Interleave low SECOND_IMODE vectors. */
41914 switch (second_imode)
41916 case E_V4SImode:
41917 for (i = j = 0; i < n / 2; i += 2, j++)
41919 op0 = gen_reg_rtx (second_imode);
41920 emit_insn (gen_interleave_second_low (op0, ops[i],
41921 ops[i + 1]));
41923 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
41924 vector. */
41925 ops[j] = gen_reg_rtx (third_imode);
41926 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
41928 second_imode = V2DImode;
41929 gen_interleave_second_low = gen_vec_interleave_lowv2di;
41930 /* FALLTHRU */
41932 case E_V2DImode:
41933 op0 = gen_reg_rtx (second_imode);
41934 emit_insn (gen_interleave_second_low (op0, ops[0],
41935 ops[1]));
41937 /* Cast the SECOND_IMODE vector back to a vector on original
41938 mode. */
41939 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
41940 break;
41942 default:
41943 gcc_unreachable ();
41947 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
41948 all values variable, and none identical. */
41950 static void
41951 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
41952 rtx target, rtx vals)
41954 rtx ops[64], op0, op1, op2, op3, op4, op5;
41955 machine_mode half_mode = VOIDmode;
41956 machine_mode quarter_mode = VOIDmode;
41957 int n, i;
41959 switch (mode)
41961 case E_V2SFmode:
41962 case E_V2SImode:
41963 if (!mmx_ok && !TARGET_SSE)
41964 break;
41965 /* FALLTHRU */
41967 case E_V16SImode:
41968 case E_V16SFmode:
41969 case E_V8DFmode:
41970 case E_V8DImode:
41971 case E_V8SFmode:
41972 case E_V8SImode:
41973 case E_V4DFmode:
41974 case E_V4DImode:
41975 case E_V4SFmode:
41976 case E_V4SImode:
41977 case E_V2DFmode:
41978 case E_V2DImode:
41979 n = GET_MODE_NUNITS (mode);
41980 for (i = 0; i < n; i++)
41981 ops[i] = XVECEXP (vals, 0, i);
41982 ix86_expand_vector_init_concat (mode, target, ops, n);
41983 return;
41985 case E_V2TImode:
41986 for (i = 0; i < 2; i++)
41987 ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
41988 op0 = gen_reg_rtx (V4DImode);
41989 ix86_expand_vector_init_concat (V4DImode, op0, ops, 2);
41990 emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
41991 return;
41993 case E_V4TImode:
41994 for (i = 0; i < 4; i++)
41995 ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
41996 ops[4] = gen_reg_rtx (V4DImode);
41997 ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2);
41998 ops[5] = gen_reg_rtx (V4DImode);
41999 ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2);
42000 op0 = gen_reg_rtx (V8DImode);
42001 ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2);
42002 emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
42003 return;
42005 case E_V32QImode:
42006 half_mode = V16QImode;
42007 goto half;
42009 case E_V16HImode:
42010 half_mode = V8HImode;
42011 goto half;
42013 half:
42014 n = GET_MODE_NUNITS (mode);
42015 for (i = 0; i < n; i++)
42016 ops[i] = XVECEXP (vals, 0, i);
42017 op0 = gen_reg_rtx (half_mode);
42018 op1 = gen_reg_rtx (half_mode);
42019 ix86_expand_vector_init_interleave (half_mode, op0, ops,
42020 n >> 2);
42021 ix86_expand_vector_init_interleave (half_mode, op1,
42022 &ops [n >> 1], n >> 2);
42023 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
42024 return;
42026 case E_V64QImode:
42027 quarter_mode = V16QImode;
42028 half_mode = V32QImode;
42029 goto quarter;
42031 case E_V32HImode:
42032 quarter_mode = V8HImode;
42033 half_mode = V16HImode;
42034 goto quarter;
42036 quarter:
42037 n = GET_MODE_NUNITS (mode);
42038 for (i = 0; i < n; i++)
42039 ops[i] = XVECEXP (vals, 0, i);
42040 op0 = gen_reg_rtx (quarter_mode);
42041 op1 = gen_reg_rtx (quarter_mode);
42042 op2 = gen_reg_rtx (quarter_mode);
42043 op3 = gen_reg_rtx (quarter_mode);
42044 op4 = gen_reg_rtx (half_mode);
42045 op5 = gen_reg_rtx (half_mode);
42046 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
42047 n >> 3);
42048 ix86_expand_vector_init_interleave (quarter_mode, op1,
42049 &ops [n >> 2], n >> 3);
42050 ix86_expand_vector_init_interleave (quarter_mode, op2,
42051 &ops [n >> 1], n >> 3);
42052 ix86_expand_vector_init_interleave (quarter_mode, op3,
42053 &ops [(n >> 1) | (n >> 2)], n >> 3);
42054 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
42055 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
42056 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
42057 return;
42059 case E_V16QImode:
42060 if (!TARGET_SSE4_1)
42061 break;
42062 /* FALLTHRU */
42064 case E_V8HImode:
42065 if (!TARGET_SSE2)
42066 break;
42068 /* Don't use ix86_expand_vector_init_interleave if we can't
42069 move from GPR to SSE register directly. */
42070 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
42071 break;
42073 n = GET_MODE_NUNITS (mode);
42074 for (i = 0; i < n; i++)
42075 ops[i] = XVECEXP (vals, 0, i);
42076 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
42077 return;
42079 case E_V4HImode:
42080 case E_V8QImode:
42081 break;
42083 default:
42084 gcc_unreachable ();
42088 int i, j, n_elts, n_words, n_elt_per_word;
42089 machine_mode inner_mode;
42090 rtx words[4], shift;
42092 inner_mode = GET_MODE_INNER (mode);
42093 n_elts = GET_MODE_NUNITS (mode);
42094 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
42095 n_elt_per_word = n_elts / n_words;
42096 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
42098 for (i = 0; i < n_words; ++i)
42100 rtx word = NULL_RTX;
42102 for (j = 0; j < n_elt_per_word; ++j)
42104 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
42105 elt = convert_modes (word_mode, inner_mode, elt, true);
42107 if (j == 0)
42108 word = elt;
42109 else
42111 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
42112 word, 1, OPTAB_LIB_WIDEN);
42113 word = expand_simple_binop (word_mode, IOR, word, elt,
42114 word, 1, OPTAB_LIB_WIDEN);
42118 words[i] = word;
42121 if (n_words == 1)
42122 emit_move_insn (target, gen_lowpart (mode, words[0]));
42123 else if (n_words == 2)
42125 rtx tmp = gen_reg_rtx (mode);
42126 emit_clobber (tmp);
42127 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
42128 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
42129 emit_move_insn (target, tmp);
42131 else if (n_words == 4)
42133 rtx tmp = gen_reg_rtx (V4SImode);
42134 gcc_assert (word_mode == SImode);
42135 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
42136 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
42137 emit_move_insn (target, gen_lowpart (mode, tmp));
42139 else
42140 gcc_unreachable ();
42144 /* Initialize vector TARGET via VALS. Suppress the use of MMX
42145 instructions unless MMX_OK is true. */
42147 void
42148 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
42150 machine_mode mode = GET_MODE (target);
42151 machine_mode inner_mode = GET_MODE_INNER (mode);
42152 int n_elts = GET_MODE_NUNITS (mode);
42153 int n_var = 0, one_var = -1;
42154 bool all_same = true, all_const_zero = true;
42155 int i;
42156 rtx x;
42158 /* Handle first initialization from vector elts. */
42159 if (n_elts != XVECLEN (vals, 0))
42161 rtx subtarget = target;
42162 x = XVECEXP (vals, 0, 0);
42163 gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode);
42164 if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts)
42166 rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) };
42167 if (inner_mode == QImode || inner_mode == HImode)
42169 unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode);
42170 mode = mode_for_vector (SImode, n_bits / 4).require ();
42171 inner_mode = mode_for_vector (SImode, n_bits / 8).require ();
42172 ops[0] = gen_lowpart (inner_mode, ops[0]);
42173 ops[1] = gen_lowpart (inner_mode, ops[1]);
42174 subtarget = gen_reg_rtx (mode);
42176 ix86_expand_vector_init_concat (mode, subtarget, ops, 2);
42177 if (subtarget != target)
42178 emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget));
42179 return;
42181 gcc_unreachable ();
42184 for (i = 0; i < n_elts; ++i)
42186 x = XVECEXP (vals, 0, i);
42187 if (!(CONST_SCALAR_INT_P (x)
42188 || CONST_DOUBLE_P (x)
42189 || CONST_FIXED_P (x)))
42190 n_var++, one_var = i;
42191 else if (x != CONST0_RTX (inner_mode))
42192 all_const_zero = false;
42193 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
42194 all_same = false;
42197 /* Constants are best loaded from the constant pool. */
42198 if (n_var == 0)
42200 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
42201 return;
42204 /* If all values are identical, broadcast the value. */
42205 if (all_same
42206 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
42207 XVECEXP (vals, 0, 0)))
42208 return;
42210 /* Values where only one field is non-constant are best loaded from
42211 the pool and overwritten via move later. */
42212 if (n_var == 1)
42214 if (all_const_zero
42215 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
42216 XVECEXP (vals, 0, one_var),
42217 one_var))
42218 return;
42220 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
42221 return;
42224 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
42227 void
42228 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
42230 machine_mode mode = GET_MODE (target);
42231 machine_mode inner_mode = GET_MODE_INNER (mode);
42232 machine_mode half_mode;
42233 bool use_vec_merge = false;
42234 rtx tmp;
42235 static rtx (*gen_extract[6][2]) (rtx, rtx)
42237 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
42238 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
42239 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
42240 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
42241 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
42242 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
42244 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
42246 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
42247 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
42248 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
42249 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
42250 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
42251 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
42253 int i, j, n;
42254 machine_mode mmode = VOIDmode;
42255 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
42257 switch (mode)
42259 case E_V2SFmode:
42260 case E_V2SImode:
42261 if (mmx_ok)
42263 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
42264 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
42265 if (elt == 0)
42266 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
42267 else
42268 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
42269 emit_insn (gen_rtx_SET (target, tmp));
42270 return;
42272 break;
42274 case E_V2DImode:
42275 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
42276 if (use_vec_merge)
42277 break;
42279 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
42280 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
42281 if (elt == 0)
42282 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
42283 else
42284 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
42285 emit_insn (gen_rtx_SET (target, tmp));
42286 return;
42288 case E_V2DFmode:
42290 rtx op0, op1;
42292 /* For the two element vectors, we implement a VEC_CONCAT with
42293 the extraction of the other element. */
42295 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
42296 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
42298 if (elt == 0)
42299 op0 = val, op1 = tmp;
42300 else
42301 op0 = tmp, op1 = val;
42303 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
42304 emit_insn (gen_rtx_SET (target, tmp));
42306 return;
42308 case E_V4SFmode:
42309 use_vec_merge = TARGET_SSE4_1;
42310 if (use_vec_merge)
42311 break;
42313 switch (elt)
42315 case 0:
42316 use_vec_merge = true;
42317 break;
42319 case 1:
42320 /* tmp = target = A B C D */
42321 tmp = copy_to_reg (target);
42322 /* target = A A B B */
42323 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
42324 /* target = X A B B */
42325 ix86_expand_vector_set (false, target, val, 0);
42326 /* target = A X C D */
42327 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
42328 const1_rtx, const0_rtx,
42329 GEN_INT (2+4), GEN_INT (3+4)));
42330 return;
42332 case 2:
42333 /* tmp = target = A B C D */
42334 tmp = copy_to_reg (target);
42335 /* tmp = X B C D */
42336 ix86_expand_vector_set (false, tmp, val, 0);
42337 /* target = A B X D */
42338 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
42339 const0_rtx, const1_rtx,
42340 GEN_INT (0+4), GEN_INT (3+4)));
42341 return;
42343 case 3:
42344 /* tmp = target = A B C D */
42345 tmp = copy_to_reg (target);
42346 /* tmp = X B C D */
42347 ix86_expand_vector_set (false, tmp, val, 0);
42348 /* target = A B X D */
42349 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
42350 const0_rtx, const1_rtx,
42351 GEN_INT (2+4), GEN_INT (0+4)));
42352 return;
42354 default:
42355 gcc_unreachable ();
42357 break;
42359 case E_V4SImode:
42360 use_vec_merge = TARGET_SSE4_1;
42361 if (use_vec_merge)
42362 break;
42364 /* Element 0 handled by vec_merge below. */
42365 if (elt == 0)
42367 use_vec_merge = true;
42368 break;
42371 if (TARGET_SSE2)
42373 /* With SSE2, use integer shuffles to swap element 0 and ELT,
42374 store into element 0, then shuffle them back. */
42376 rtx order[4];
42378 order[0] = GEN_INT (elt);
42379 order[1] = const1_rtx;
42380 order[2] = const2_rtx;
42381 order[3] = GEN_INT (3);
42382 order[elt] = const0_rtx;
42384 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
42385 order[1], order[2], order[3]));
42387 ix86_expand_vector_set (false, target, val, 0);
42389 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
42390 order[1], order[2], order[3]));
42392 else
42394 /* For SSE1, we have to reuse the V4SF code. */
42395 rtx t = gen_reg_rtx (V4SFmode);
42396 emit_move_insn (t, gen_lowpart (V4SFmode, target));
42397 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
42398 emit_move_insn (target, gen_lowpart (mode, t));
42400 return;
42402 case E_V8HImode:
42403 use_vec_merge = TARGET_SSE2;
42404 break;
42405 case E_V4HImode:
42406 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
42407 break;
42409 case E_V16QImode:
42410 use_vec_merge = TARGET_SSE4_1;
42411 break;
42413 case E_V8QImode:
42414 break;
42416 case E_V32QImode:
42417 half_mode = V16QImode;
42418 j = 0;
42419 n = 16;
42420 goto half;
42422 case E_V16HImode:
42423 half_mode = V8HImode;
42424 j = 1;
42425 n = 8;
42426 goto half;
42428 case E_V8SImode:
42429 half_mode = V4SImode;
42430 j = 2;
42431 n = 4;
42432 goto half;
42434 case E_V4DImode:
42435 half_mode = V2DImode;
42436 j = 3;
42437 n = 2;
42438 goto half;
42440 case E_V8SFmode:
42441 half_mode = V4SFmode;
42442 j = 4;
42443 n = 4;
42444 goto half;
42446 case E_V4DFmode:
42447 half_mode = V2DFmode;
42448 j = 5;
42449 n = 2;
42450 goto half;
42452 half:
42453 /* Compute offset. */
42454 i = elt / n;
42455 elt %= n;
42457 gcc_assert (i <= 1);
42459 /* Extract the half. */
42460 tmp = gen_reg_rtx (half_mode);
42461 emit_insn (gen_extract[j][i] (tmp, target));
42463 /* Put val in tmp at elt. */
42464 ix86_expand_vector_set (false, tmp, val, elt);
42466 /* Put it back. */
42467 emit_insn (gen_insert[j][i] (target, target, tmp));
42468 return;
42470 case E_V8DFmode:
42471 if (TARGET_AVX512F)
42473 mmode = QImode;
42474 gen_blendm = gen_avx512f_blendmv8df;
42476 break;
42478 case E_V8DImode:
42479 if (TARGET_AVX512F)
42481 mmode = QImode;
42482 gen_blendm = gen_avx512f_blendmv8di;
42484 break;
42486 case E_V16SFmode:
42487 if (TARGET_AVX512F)
42489 mmode = HImode;
42490 gen_blendm = gen_avx512f_blendmv16sf;
42492 break;
42494 case E_V16SImode:
42495 if (TARGET_AVX512F)
42497 mmode = HImode;
42498 gen_blendm = gen_avx512f_blendmv16si;
42500 break;
42502 case E_V32HImode:
42503 if (TARGET_AVX512F && TARGET_AVX512BW)
42505 mmode = SImode;
42506 gen_blendm = gen_avx512bw_blendmv32hi;
42508 break;
42510 case E_V64QImode:
42511 if (TARGET_AVX512F && TARGET_AVX512BW)
42513 mmode = DImode;
42514 gen_blendm = gen_avx512bw_blendmv64qi;
42516 break;
42518 default:
42519 break;
42522 if (mmode != VOIDmode)
42524 tmp = gen_reg_rtx (mode);
42525 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
42526 /* The avx512*_blendm<mode> expanders have different operand order
42527 from VEC_MERGE. In VEC_MERGE, the first input operand is used for
42528 elements where the mask is set and second input operand otherwise,
42529 in {sse,avx}*_*blend* the first input operand is used for elements
42530 where the mask is clear and second input operand otherwise. */
42531 emit_insn (gen_blendm (target, target, tmp,
42532 force_reg (mmode,
42533 gen_int_mode (1 << elt, mmode))));
42535 else if (use_vec_merge)
42537 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
42538 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
42539 emit_insn (gen_rtx_SET (target, tmp));
42541 else
42543 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
42545 emit_move_insn (mem, target);
42547 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
42548 emit_move_insn (tmp, val);
42550 emit_move_insn (target, mem);
42554 void
42555 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
42557 machine_mode mode = GET_MODE (vec);
42558 machine_mode inner_mode = GET_MODE_INNER (mode);
42559 bool use_vec_extr = false;
42560 rtx tmp;
42562 switch (mode)
42564 case E_V2SImode:
42565 case E_V2SFmode:
42566 if (!mmx_ok)
42567 break;
42568 /* FALLTHRU */
42570 case E_V2DFmode:
42571 case E_V2DImode:
42572 case E_V2TImode:
42573 case E_V4TImode:
42574 use_vec_extr = true;
42575 break;
42577 case E_V4SFmode:
42578 use_vec_extr = TARGET_SSE4_1;
42579 if (use_vec_extr)
42580 break;
42582 switch (elt)
42584 case 0:
42585 tmp = vec;
42586 break;
42588 case 1:
42589 case 3:
42590 tmp = gen_reg_rtx (mode);
42591 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
42592 GEN_INT (elt), GEN_INT (elt),
42593 GEN_INT (elt+4), GEN_INT (elt+4)));
42594 break;
42596 case 2:
42597 tmp = gen_reg_rtx (mode);
42598 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
42599 break;
42601 default:
42602 gcc_unreachable ();
42604 vec = tmp;
42605 use_vec_extr = true;
42606 elt = 0;
42607 break;
42609 case E_V4SImode:
42610 use_vec_extr = TARGET_SSE4_1;
42611 if (use_vec_extr)
42612 break;
42614 if (TARGET_SSE2)
42616 switch (elt)
42618 case 0:
42619 tmp = vec;
42620 break;
42622 case 1:
42623 case 3:
42624 tmp = gen_reg_rtx (mode);
42625 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
42626 GEN_INT (elt), GEN_INT (elt),
42627 GEN_INT (elt), GEN_INT (elt)));
42628 break;
42630 case 2:
42631 tmp = gen_reg_rtx (mode);
42632 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
42633 break;
42635 default:
42636 gcc_unreachable ();
42638 vec = tmp;
42639 use_vec_extr = true;
42640 elt = 0;
42642 else
42644 /* For SSE1, we have to reuse the V4SF code. */
42645 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
42646 gen_lowpart (V4SFmode, vec), elt);
42647 return;
42649 break;
42651 case E_V8HImode:
42652 use_vec_extr = TARGET_SSE2;
42653 break;
42654 case E_V4HImode:
42655 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
42656 break;
42658 case E_V16QImode:
42659 use_vec_extr = TARGET_SSE4_1;
42660 break;
42662 case E_V8SFmode:
42663 if (TARGET_AVX)
42665 tmp = gen_reg_rtx (V4SFmode);
42666 if (elt < 4)
42667 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
42668 else
42669 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
42670 ix86_expand_vector_extract (false, target, tmp, elt & 3);
42671 return;
42673 break;
42675 case E_V4DFmode:
42676 if (TARGET_AVX)
42678 tmp = gen_reg_rtx (V2DFmode);
42679 if (elt < 2)
42680 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
42681 else
42682 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
42683 ix86_expand_vector_extract (false, target, tmp, elt & 1);
42684 return;
42686 break;
42688 case E_V32QImode:
42689 if (TARGET_AVX)
42691 tmp = gen_reg_rtx (V16QImode);
42692 if (elt < 16)
42693 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
42694 else
42695 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
42696 ix86_expand_vector_extract (false, target, tmp, elt & 15);
42697 return;
42699 break;
42701 case E_V16HImode:
42702 if (TARGET_AVX)
42704 tmp = gen_reg_rtx (V8HImode);
42705 if (elt < 8)
42706 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
42707 else
42708 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
42709 ix86_expand_vector_extract (false, target, tmp, elt & 7);
42710 return;
42712 break;
42714 case E_V8SImode:
42715 if (TARGET_AVX)
42717 tmp = gen_reg_rtx (V4SImode);
42718 if (elt < 4)
42719 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
42720 else
42721 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
42722 ix86_expand_vector_extract (false, target, tmp, elt & 3);
42723 return;
42725 break;
42727 case E_V4DImode:
42728 if (TARGET_AVX)
42730 tmp = gen_reg_rtx (V2DImode);
42731 if (elt < 2)
42732 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
42733 else
42734 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
42735 ix86_expand_vector_extract (false, target, tmp, elt & 1);
42736 return;
42738 break;
42740 case E_V32HImode:
42741 if (TARGET_AVX512BW)
42743 tmp = gen_reg_rtx (V16HImode);
42744 if (elt < 16)
42745 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
42746 else
42747 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
42748 ix86_expand_vector_extract (false, target, tmp, elt & 15);
42749 return;
42751 break;
42753 case E_V64QImode:
42754 if (TARGET_AVX512BW)
42756 tmp = gen_reg_rtx (V32QImode);
42757 if (elt < 32)
42758 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
42759 else
42760 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
42761 ix86_expand_vector_extract (false, target, tmp, elt & 31);
42762 return;
42764 break;
42766 case E_V16SFmode:
42767 tmp = gen_reg_rtx (V8SFmode);
42768 if (elt < 8)
42769 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
42770 else
42771 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
42772 ix86_expand_vector_extract (false, target, tmp, elt & 7);
42773 return;
42775 case E_V8DFmode:
42776 tmp = gen_reg_rtx (V4DFmode);
42777 if (elt < 4)
42778 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
42779 else
42780 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
42781 ix86_expand_vector_extract (false, target, tmp, elt & 3);
42782 return;
42784 case E_V16SImode:
42785 tmp = gen_reg_rtx (V8SImode);
42786 if (elt < 8)
42787 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
42788 else
42789 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
42790 ix86_expand_vector_extract (false, target, tmp, elt & 7);
42791 return;
42793 case E_V8DImode:
42794 tmp = gen_reg_rtx (V4DImode);
42795 if (elt < 4)
42796 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
42797 else
42798 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
42799 ix86_expand_vector_extract (false, target, tmp, elt & 3);
42800 return;
42802 case E_V8QImode:
42803 /* ??? Could extract the appropriate HImode element and shift. */
42804 default:
42805 break;
42808 if (use_vec_extr)
42810 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
42811 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
42813 /* Let the rtl optimizers know about the zero extension performed. */
42814 if (inner_mode == QImode || inner_mode == HImode)
42816 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
42817 target = gen_lowpart (SImode, target);
42820 emit_insn (gen_rtx_SET (target, tmp));
42822 else
42824 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
42826 emit_move_insn (mem, vec);
42828 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
42829 emit_move_insn (target, tmp);
42833 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
42834 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
42835 The upper bits of DEST are undefined, though they shouldn't cause
42836 exceptions (some bits from src or all zeros are ok). */
42838 static void
42839 emit_reduc_half (rtx dest, rtx src, int i)
42841 rtx tem, d = dest;
42842 switch (GET_MODE (src))
42844 case E_V4SFmode:
42845 if (i == 128)
42846 tem = gen_sse_movhlps (dest, src, src);
42847 else
42848 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
42849 GEN_INT (1 + 4), GEN_INT (1 + 4));
42850 break;
42851 case E_V2DFmode:
42852 tem = gen_vec_interleave_highv2df (dest, src, src);
42853 break;
42854 case E_V16QImode:
42855 case E_V8HImode:
42856 case E_V4SImode:
42857 case E_V2DImode:
42858 d = gen_reg_rtx (V1TImode);
42859 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
42860 GEN_INT (i / 2));
42861 break;
42862 case E_V8SFmode:
42863 if (i == 256)
42864 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
42865 else
42866 tem = gen_avx_shufps256 (dest, src, src,
42867 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
42868 break;
42869 case E_V4DFmode:
42870 if (i == 256)
42871 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
42872 else
42873 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
42874 break;
42875 case E_V32QImode:
42876 case E_V16HImode:
42877 case E_V8SImode:
42878 case E_V4DImode:
42879 if (i == 256)
42881 if (GET_MODE (dest) != V4DImode)
42882 d = gen_reg_rtx (V4DImode);
42883 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
42884 gen_lowpart (V4DImode, src),
42885 const1_rtx);
42887 else
42889 d = gen_reg_rtx (V2TImode);
42890 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
42891 GEN_INT (i / 2));
42893 break;
42894 case E_V64QImode:
42895 case E_V32HImode:
42896 case E_V16SImode:
42897 case E_V16SFmode:
42898 case E_V8DImode:
42899 case E_V8DFmode:
42900 if (i > 128)
42901 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
42902 gen_lowpart (V16SImode, src),
42903 gen_lowpart (V16SImode, src),
42904 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
42905 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
42906 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
42907 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
42908 GEN_INT (0xC), GEN_INT (0xD),
42909 GEN_INT (0xE), GEN_INT (0xF),
42910 GEN_INT (0x10), GEN_INT (0x11),
42911 GEN_INT (0x12), GEN_INT (0x13),
42912 GEN_INT (0x14), GEN_INT (0x15),
42913 GEN_INT (0x16), GEN_INT (0x17));
42914 else
42915 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
42916 gen_lowpart (V16SImode, src),
42917 GEN_INT (i == 128 ? 0x2 : 0x1),
42918 GEN_INT (0x3),
42919 GEN_INT (0x3),
42920 GEN_INT (0x3),
42921 GEN_INT (i == 128 ? 0x6 : 0x5),
42922 GEN_INT (0x7),
42923 GEN_INT (0x7),
42924 GEN_INT (0x7),
42925 GEN_INT (i == 128 ? 0xA : 0x9),
42926 GEN_INT (0xB),
42927 GEN_INT (0xB),
42928 GEN_INT (0xB),
42929 GEN_INT (i == 128 ? 0xE : 0xD),
42930 GEN_INT (0xF),
42931 GEN_INT (0xF),
42932 GEN_INT (0xF));
42933 break;
42934 default:
42935 gcc_unreachable ();
42937 emit_insn (tem);
42938 if (d != dest)
42939 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
42942 /* Expand a vector reduction. FN is the binary pattern to reduce;
42943 DEST is the destination; IN is the input vector. */
42945 void
42946 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
42948 rtx half, dst, vec = in;
42949 machine_mode mode = GET_MODE (in);
42950 int i;
42952 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
42953 if (TARGET_SSE4_1
42954 && mode == V8HImode
42955 && fn == gen_uminv8hi3)
42957 emit_insn (gen_sse4_1_phminposuw (dest, in));
42958 return;
42961 for (i = GET_MODE_BITSIZE (mode);
42962 i > GET_MODE_UNIT_BITSIZE (mode);
42963 i >>= 1)
42965 half = gen_reg_rtx (mode);
42966 emit_reduc_half (half, vec, i);
42967 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
42968 dst = dest;
42969 else
42970 dst = gen_reg_rtx (mode);
42971 emit_insn (fn (dst, half, vec));
42972 vec = dst;
42976 /* Target hook for scalar_mode_supported_p. */
42977 static bool
42978 ix86_scalar_mode_supported_p (scalar_mode mode)
42980 if (DECIMAL_FLOAT_MODE_P (mode))
42981 return default_decimal_float_supported_p ();
42982 else if (mode == TFmode)
42983 return true;
42984 else
42985 return default_scalar_mode_supported_p (mode);
42988 /* Implements target hook vector_mode_supported_p. */
42989 static bool
42990 ix86_vector_mode_supported_p (machine_mode mode)
42992 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42993 return true;
42994 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42995 return true;
42996 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42997 return true;
42998 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42999 return true;
43000 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
43001 return true;
43002 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
43003 return true;
43004 return false;
43007 /* Target hook for c_mode_for_suffix. */
43008 static machine_mode
43009 ix86_c_mode_for_suffix (char suffix)
43011 if (suffix == 'q')
43012 return TFmode;
43013 if (suffix == 'w')
43014 return XFmode;
43016 return VOIDmode;
43019 /* Worker function for TARGET_MD_ASM_ADJUST.
43021 We implement asm flag outputs, and maintain source compatibility
43022 with the old cc0-based compiler. */
43024 static rtx_insn *
43025 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
43026 vec<const char *> &constraints,
43027 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
43029 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
43030 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
43032 bool saw_asm_flag = false;
43034 start_sequence ();
43035 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
43037 const char *con = constraints[i];
43038 if (strncmp (con, "=@cc", 4) != 0)
43039 continue;
43040 con += 4;
43041 if (strchr (con, ',') != NULL)
43043 error ("alternatives not allowed in asm flag output");
43044 continue;
43047 bool invert = false;
43048 if (con[0] == 'n')
43049 invert = true, con++;
43051 machine_mode mode = CCmode;
43052 rtx_code code = UNKNOWN;
43054 switch (con[0])
43056 case 'a':
43057 if (con[1] == 0)
43058 mode = CCAmode, code = EQ;
43059 else if (con[1] == 'e' && con[2] == 0)
43060 mode = CCCmode, code = NE;
43061 break;
43062 case 'b':
43063 if (con[1] == 0)
43064 mode = CCCmode, code = EQ;
43065 else if (con[1] == 'e' && con[2] == 0)
43066 mode = CCAmode, code = NE;
43067 break;
43068 case 'c':
43069 if (con[1] == 0)
43070 mode = CCCmode, code = EQ;
43071 break;
43072 case 'e':
43073 if (con[1] == 0)
43074 mode = CCZmode, code = EQ;
43075 break;
43076 case 'g':
43077 if (con[1] == 0)
43078 mode = CCGCmode, code = GT;
43079 else if (con[1] == 'e' && con[2] == 0)
43080 mode = CCGCmode, code = GE;
43081 break;
43082 case 'l':
43083 if (con[1] == 0)
43084 mode = CCGCmode, code = LT;
43085 else if (con[1] == 'e' && con[2] == 0)
43086 mode = CCGCmode, code = LE;
43087 break;
43088 case 'o':
43089 if (con[1] == 0)
43090 mode = CCOmode, code = EQ;
43091 break;
43092 case 'p':
43093 if (con[1] == 0)
43094 mode = CCPmode, code = EQ;
43095 break;
43096 case 's':
43097 if (con[1] == 0)
43098 mode = CCSmode, code = EQ;
43099 break;
43100 case 'z':
43101 if (con[1] == 0)
43102 mode = CCZmode, code = EQ;
43103 break;
43105 if (code == UNKNOWN)
43107 error ("unknown asm flag output %qs", constraints[i]);
43108 continue;
43110 if (invert)
43111 code = reverse_condition (code);
43113 rtx dest = outputs[i];
43114 if (!saw_asm_flag)
43116 /* This is the first asm flag output. Here we put the flags
43117 register in as the real output and adjust the condition to
43118 allow it. */
43119 constraints[i] = "=Bf";
43120 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
43121 saw_asm_flag = true;
43123 else
43125 /* We don't need the flags register as output twice. */
43126 constraints[i] = "=X";
43127 outputs[i] = gen_rtx_SCRATCH (SImode);
43130 rtx x = gen_rtx_REG (mode, FLAGS_REG);
43131 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
43133 machine_mode dest_mode = GET_MODE (dest);
43134 if (!SCALAR_INT_MODE_P (dest_mode))
43136 error ("invalid type for asm flag output");
43137 continue;
43140 if (dest_mode == DImode && !TARGET_64BIT)
43141 dest_mode = SImode;
43143 if (dest_mode != QImode)
43145 rtx destqi = gen_reg_rtx (QImode);
43146 emit_insn (gen_rtx_SET (destqi, x));
43148 if (TARGET_ZERO_EXTEND_WITH_AND
43149 && optimize_function_for_speed_p (cfun))
43151 x = force_reg (dest_mode, const0_rtx);
43153 emit_insn (gen_movstrictqi
43154 (gen_lowpart (QImode, x), destqi));
43156 else
43157 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
43160 if (dest_mode != GET_MODE (dest))
43162 rtx tmp = gen_reg_rtx (SImode);
43164 emit_insn (gen_rtx_SET (tmp, x));
43165 emit_insn (gen_zero_extendsidi2 (dest, tmp));
43167 else
43168 emit_insn (gen_rtx_SET (dest, x));
43170 rtx_insn *seq = get_insns ();
43171 end_sequence ();
43173 if (saw_asm_flag)
43174 return seq;
43175 else
43177 /* If we had no asm flag outputs, clobber the flags. */
43178 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
43179 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
43180 return NULL;
43184 /* Implements target vector targetm.asm.encode_section_info. */
43186 static void ATTRIBUTE_UNUSED
43187 ix86_encode_section_info (tree decl, rtx rtl, int first)
43189 default_encode_section_info (decl, rtl, first);
43191 if (ix86_in_large_data_p (decl))
43192 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
43195 /* Worker function for REVERSE_CONDITION. */
43197 enum rtx_code
43198 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
43200 return (mode == CCFPmode
43201 ? reverse_condition_maybe_unordered (code)
43202 : reverse_condition (code));
43205 /* Output code to perform an x87 FP register move, from OPERANDS[1]
43206 to OPERANDS[0]. */
43208 const char *
43209 output_387_reg_move (rtx_insn *insn, rtx *operands)
43211 if (REG_P (operands[0]))
43213 if (REG_P (operands[1])
43214 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
43216 if (REGNO (operands[0]) == FIRST_STACK_REG)
43217 return output_387_ffreep (operands, 0);
43218 return "fstp\t%y0";
43220 if (STACK_TOP_P (operands[0]))
43221 return "fld%Z1\t%y1";
43222 return "fst\t%y0";
43224 else if (MEM_P (operands[0]))
43226 gcc_assert (REG_P (operands[1]));
43227 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
43228 return "fstp%Z0\t%y0";
43229 else
43231 /* There is no non-popping store to memory for XFmode.
43232 So if we need one, follow the store with a load. */
43233 if (GET_MODE (operands[0]) == XFmode)
43234 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
43235 else
43236 return "fst%Z0\t%y0";
43239 else
43240 gcc_unreachable();
43243 /* Output code to perform a conditional jump to LABEL, if C2 flag in
43244 FP status register is set. */
43246 void
43247 ix86_emit_fp_unordered_jump (rtx label)
43249 rtx reg = gen_reg_rtx (HImode);
43250 rtx temp;
43252 emit_insn (gen_x86_fnstsw_1 (reg));
43254 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
43256 emit_insn (gen_x86_sahf_1 (reg));
43258 temp = gen_rtx_REG (CCmode, FLAGS_REG);
43259 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
43261 else
43263 emit_insn (gen_testqi_ext_1_ccno (reg, GEN_INT (0x04)));
43265 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
43266 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
43269 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
43270 gen_rtx_LABEL_REF (VOIDmode, label),
43271 pc_rtx);
43272 temp = gen_rtx_SET (pc_rtx, temp);
43274 emit_jump_insn (temp);
43275 predict_jump (REG_BR_PROB_BASE * 10 / 100);
43278 /* Output code to perform a log1p XFmode calculation. */
43280 void ix86_emit_i387_log1p (rtx op0, rtx op1)
43282 rtx_code_label *label1 = gen_label_rtx ();
43283 rtx_code_label *label2 = gen_label_rtx ();
43285 rtx tmp = gen_reg_rtx (XFmode);
43286 rtx tmp2 = gen_reg_rtx (XFmode);
43287 rtx test;
43289 emit_insn (gen_absxf2 (tmp, op1));
43290 test = gen_rtx_GE (VOIDmode, tmp,
43291 const_double_from_real_value (
43292 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
43293 XFmode));
43294 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
43296 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
43297 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
43298 emit_jump (label2);
43300 emit_label (label1);
43301 emit_move_insn (tmp, CONST1_RTX (XFmode));
43302 emit_insn (gen_addxf3 (tmp, op1, tmp));
43303 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
43304 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
43306 emit_label (label2);
43309 /* Emit code for round calculation. */
43310 void ix86_emit_i387_round (rtx op0, rtx op1)
43312 machine_mode inmode = GET_MODE (op1);
43313 machine_mode outmode = GET_MODE (op0);
43314 rtx e1, e2, res, tmp, tmp1, half;
43315 rtx scratch = gen_reg_rtx (HImode);
43316 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
43317 rtx_code_label *jump_label = gen_label_rtx ();
43318 rtx insn;
43319 rtx (*gen_abs) (rtx, rtx);
43320 rtx (*gen_neg) (rtx, rtx);
43322 switch (inmode)
43324 case E_SFmode:
43325 gen_abs = gen_abssf2;
43326 break;
43327 case E_DFmode:
43328 gen_abs = gen_absdf2;
43329 break;
43330 case E_XFmode:
43331 gen_abs = gen_absxf2;
43332 break;
43333 default:
43334 gcc_unreachable ();
43337 switch (outmode)
43339 case E_SFmode:
43340 gen_neg = gen_negsf2;
43341 break;
43342 case E_DFmode:
43343 gen_neg = gen_negdf2;
43344 break;
43345 case E_XFmode:
43346 gen_neg = gen_negxf2;
43347 break;
43348 case E_HImode:
43349 gen_neg = gen_neghi2;
43350 break;
43351 case E_SImode:
43352 gen_neg = gen_negsi2;
43353 break;
43354 case E_DImode:
43355 gen_neg = gen_negdi2;
43356 break;
43357 default:
43358 gcc_unreachable ();
43361 e1 = gen_reg_rtx (inmode);
43362 e2 = gen_reg_rtx (inmode);
43363 res = gen_reg_rtx (outmode);
43365 half = const_double_from_real_value (dconsthalf, inmode);
43367 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
43369 /* scratch = fxam(op1) */
43370 emit_insn (gen_rtx_SET (scratch,
43371 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
43372 UNSPEC_FXAM)));
43373 /* e1 = fabs(op1) */
43374 emit_insn (gen_abs (e1, op1));
43376 /* e2 = e1 + 0.5 */
43377 half = force_reg (inmode, half);
43378 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
43380 /* res = floor(e2) */
43381 if (inmode != XFmode)
43383 tmp1 = gen_reg_rtx (XFmode);
43385 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
43387 else
43388 tmp1 = e2;
43390 switch (outmode)
43392 case E_SFmode:
43393 case E_DFmode:
43395 rtx tmp0 = gen_reg_rtx (XFmode);
43397 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
43399 emit_insn (gen_rtx_SET (res,
43400 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
43401 UNSPEC_TRUNC_NOOP)));
43403 break;
43404 case E_XFmode:
43405 emit_insn (gen_frndintxf2_floor (res, tmp1));
43406 break;
43407 case E_HImode:
43408 emit_insn (gen_lfloorxfhi2 (res, tmp1));
43409 break;
43410 case E_SImode:
43411 emit_insn (gen_lfloorxfsi2 (res, tmp1));
43412 break;
43413 case E_DImode:
43414 emit_insn (gen_lfloorxfdi2 (res, tmp1));
43415 break;
43416 default:
43417 gcc_unreachable ();
43420 /* flags = signbit(a) */
43421 emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
43423 /* if (flags) then res = -res */
43424 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
43425 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
43426 gen_rtx_LABEL_REF (VOIDmode, jump_label),
43427 pc_rtx);
43428 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
43429 predict_jump (REG_BR_PROB_BASE * 50 / 100);
43430 JUMP_LABEL (insn) = jump_label;
43432 emit_insn (gen_neg (res, res));
43434 emit_label (jump_label);
43435 LABEL_NUSES (jump_label) = 1;
43437 emit_move_insn (op0, res);
43440 /* Output code to perform a Newton-Rhapson approximation of a single precision
43441 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
43443 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
43445 rtx x0, x1, e0, e1;
43447 x0 = gen_reg_rtx (mode);
43448 e0 = gen_reg_rtx (mode);
43449 e1 = gen_reg_rtx (mode);
43450 x1 = gen_reg_rtx (mode);
43452 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
43454 b = force_reg (mode, b);
43456 /* x0 = rcp(b) estimate */
43457 if (mode == V16SFmode || mode == V8DFmode)
43459 if (TARGET_AVX512ER)
43461 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
43462 UNSPEC_RCP28)));
43463 /* res = a * x0 */
43464 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0)));
43465 return;
43467 else
43468 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
43469 UNSPEC_RCP14)));
43471 else
43472 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
43473 UNSPEC_RCP)));
43475 /* e0 = x0 * b */
43476 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
43478 /* e0 = x0 * e0 */
43479 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
43481 /* e1 = x0 + x0 */
43482 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
43484 /* x1 = e1 - e0 */
43485 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
43487 /* res = a * x1 */
43488 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
43491 /* Output code to perform a Newton-Rhapson approximation of a
43492 single precision floating point [reciprocal] square root. */
43494 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
43496 rtx x0, e0, e1, e2, e3, mthree, mhalf;
43497 REAL_VALUE_TYPE r;
43498 int unspec;
43500 x0 = gen_reg_rtx (mode);
43501 e0 = gen_reg_rtx (mode);
43502 e1 = gen_reg_rtx (mode);
43503 e2 = gen_reg_rtx (mode);
43504 e3 = gen_reg_rtx (mode);
43506 if (TARGET_AVX512ER && mode == V16SFmode)
43508 if (recip)
43509 /* res = rsqrt28(a) estimate */
43510 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
43511 UNSPEC_RSQRT28)));
43512 else
43514 /* x0 = rsqrt28(a) estimate */
43515 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
43516 UNSPEC_RSQRT28)));
43517 /* res = rcp28(x0) estimate */
43518 emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0),
43519 UNSPEC_RCP28)));
43521 return;
43524 real_from_integer (&r, VOIDmode, -3, SIGNED);
43525 mthree = const_double_from_real_value (r, SFmode);
43527 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
43528 mhalf = const_double_from_real_value (r, SFmode);
43529 unspec = UNSPEC_RSQRT;
43531 if (VECTOR_MODE_P (mode))
43533 mthree = ix86_build_const_vector (mode, true, mthree);
43534 mhalf = ix86_build_const_vector (mode, true, mhalf);
43535 /* There is no 512-bit rsqrt. There is however rsqrt14. */
43536 if (GET_MODE_SIZE (mode) == 64)
43537 unspec = UNSPEC_RSQRT14;
43540 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
43541 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
43543 a = force_reg (mode, a);
43545 /* x0 = rsqrt(a) estimate */
43546 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
43547 unspec)));
43549 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
43550 if (!recip)
43552 rtx zero = force_reg (mode, CONST0_RTX(mode));
43553 rtx mask;
43555 /* Handle masked compare. */
43556 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
43558 mask = gen_reg_rtx (HImode);
43559 /* Imm value 0x4 corresponds to not-equal comparison. */
43560 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
43561 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
43563 else
43565 mask = gen_reg_rtx (mode);
43566 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
43567 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
43571 /* e0 = x0 * a */
43572 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
43573 /* e1 = e0 * x0 */
43574 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
43576 /* e2 = e1 - 3. */
43577 mthree = force_reg (mode, mthree);
43578 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
43580 mhalf = force_reg (mode, mhalf);
43581 if (recip)
43582 /* e3 = -.5 * x0 */
43583 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
43584 else
43585 /* e3 = -.5 * e0 */
43586 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
43587 /* ret = e2 * e3 */
43588 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
43591 #ifdef TARGET_SOLARIS
43592 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
43594 static void
43595 i386_solaris_elf_named_section (const char *name, unsigned int flags,
43596 tree decl)
43598 /* With Binutils 2.15, the "@unwind" marker must be specified on
43599 every occurrence of the ".eh_frame" section, not just the first
43600 one. */
43601 if (TARGET_64BIT
43602 && strcmp (name, ".eh_frame") == 0)
43604 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
43605 flags & SECTION_WRITE ? "aw" : "a");
43606 return;
43609 #ifndef USE_GAS
43610 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
43612 solaris_elf_asm_comdat_section (name, flags, decl);
43613 return;
43615 #endif
43617 default_elf_asm_named_section (name, flags, decl);
43619 #endif /* TARGET_SOLARIS */
43621 /* Return the mangling of TYPE if it is an extended fundamental type. */
43623 static const char *
43624 ix86_mangle_type (const_tree type)
43626 type = TYPE_MAIN_VARIANT (type);
43628 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
43629 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
43630 return NULL;
43632 switch (TYPE_MODE (type))
43634 case E_TFmode:
43635 /* __float128 is "g". */
43636 return "g";
43637 case E_XFmode:
43638 /* "long double" or __float80 is "e". */
43639 return "e";
43640 default:
43641 return NULL;
43645 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
43647 static tree
43648 ix86_stack_protect_guard (void)
43650 if (TARGET_SSP_TLS_GUARD)
43652 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
43653 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
43654 tree type = build_qualified_type (type_node, qual);
43655 tree t;
43657 if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
43659 t = ix86_tls_stack_chk_guard_decl;
43661 if (t == NULL)
43663 rtx x;
43665 t = build_decl
43666 (UNKNOWN_LOCATION, VAR_DECL,
43667 get_identifier (ix86_stack_protector_guard_symbol_str),
43668 type);
43669 TREE_STATIC (t) = 1;
43670 TREE_PUBLIC (t) = 1;
43671 DECL_EXTERNAL (t) = 1;
43672 TREE_USED (t) = 1;
43673 TREE_THIS_VOLATILE (t) = 1;
43674 DECL_ARTIFICIAL (t) = 1;
43675 DECL_IGNORED_P (t) = 1;
43677 /* Do not share RTL as the declaration is visible outside of
43678 current function. */
43679 x = DECL_RTL (t);
43680 RTX_FLAG (x, used) = 1;
43682 ix86_tls_stack_chk_guard_decl = t;
43685 else
43687 tree asptrtype = build_pointer_type (type);
43689 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
43690 t = build2 (MEM_REF, asptrtype, t,
43691 build_int_cst (asptrtype, 0));
43694 return t;
43697 return default_stack_protect_guard ();
43700 /* For 32-bit code we can save PIC register setup by using
43701 __stack_chk_fail_local hidden function instead of calling
43702 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
43703 register, so it is better to call __stack_chk_fail directly. */
43705 static tree ATTRIBUTE_UNUSED
43706 ix86_stack_protect_fail (void)
43708 return TARGET_64BIT
43709 ? default_external_stack_protect_fail ()
43710 : default_hidden_stack_protect_fail ();
43713 /* Select a format to encode pointers in exception handling data. CODE
43714 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
43715 true if the symbol may be affected by dynamic relocations.
43717 ??? All x86 object file formats are capable of representing this.
43718 After all, the relocation needed is the same as for the call insn.
43719 Whether or not a particular assembler allows us to enter such, I
43720 guess we'll have to see. */
43722 asm_preferred_eh_data_format (int code, int global)
43724 if (flag_pic)
43726 int type = DW_EH_PE_sdata8;
43727 if (!TARGET_64BIT
43728 || ix86_cmodel == CM_SMALL_PIC
43729 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
43730 type = DW_EH_PE_sdata4;
43731 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
43733 if (ix86_cmodel == CM_SMALL
43734 || (ix86_cmodel == CM_MEDIUM && code))
43735 return DW_EH_PE_udata4;
43736 return DW_EH_PE_absptr;
43739 /* Expand copysign from SIGN to the positive value ABS_VALUE
43740 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
43741 the sign-bit. */
43742 static void
43743 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
43745 machine_mode mode = GET_MODE (sign);
43746 rtx sgn = gen_reg_rtx (mode);
43747 if (mask == NULL_RTX)
43749 machine_mode vmode;
43751 if (mode == SFmode)
43752 vmode = V4SFmode;
43753 else if (mode == DFmode)
43754 vmode = V2DFmode;
43755 else
43756 vmode = mode;
43758 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
43759 if (!VECTOR_MODE_P (mode))
43761 /* We need to generate a scalar mode mask in this case. */
43762 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
43763 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
43764 mask = gen_reg_rtx (mode);
43765 emit_insn (gen_rtx_SET (mask, tmp));
43768 else
43769 mask = gen_rtx_NOT (mode, mask);
43770 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
43771 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
43774 /* Expand fabs (OP0) and return a new rtx that holds the result. The
43775 mask for masking out the sign-bit is stored in *SMASK, if that is
43776 non-null. */
43777 static rtx
43778 ix86_expand_sse_fabs (rtx op0, rtx *smask)
43780 machine_mode vmode, mode = GET_MODE (op0);
43781 rtx xa, mask;
43783 xa = gen_reg_rtx (mode);
43784 if (mode == SFmode)
43785 vmode = V4SFmode;
43786 else if (mode == DFmode)
43787 vmode = V2DFmode;
43788 else
43789 vmode = mode;
43790 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
43791 if (!VECTOR_MODE_P (mode))
43793 /* We need to generate a scalar mode mask in this case. */
43794 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
43795 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
43796 mask = gen_reg_rtx (mode);
43797 emit_insn (gen_rtx_SET (mask, tmp));
43799 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
43801 if (smask)
43802 *smask = mask;
43804 return xa;
43807 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
43808 swapping the operands if SWAP_OPERANDS is true. The expanded
43809 code is a forward jump to a newly created label in case the
43810 comparison is true. The generated label rtx is returned. */
43811 static rtx_code_label *
43812 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
43813 bool swap_operands)
43815 bool unordered_compare = ix86_unordered_fp_compare (code);
43816 rtx_code_label *label;
43817 rtx tmp, reg;
43819 if (swap_operands)
43820 std::swap (op0, op1);
43822 label = gen_label_rtx ();
43823 tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
43824 if (unordered_compare)
43825 tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
43826 reg = gen_rtx_REG (CCFPmode, FLAGS_REG);
43827 emit_insn (gen_rtx_SET (reg, tmp));
43828 tmp = gen_rtx_fmt_ee (code, VOIDmode, reg, const0_rtx);
43829 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
43830 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
43831 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
43832 JUMP_LABEL (tmp) = label;
43834 return label;
43837 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
43838 using comparison code CODE. Operands are swapped for the comparison if
43839 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
43840 static rtx
43841 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
43842 bool swap_operands)
43844 rtx (*insn)(rtx, rtx, rtx, rtx);
43845 machine_mode mode = GET_MODE (op0);
43846 rtx mask = gen_reg_rtx (mode);
43848 if (swap_operands)
43849 std::swap (op0, op1);
43851 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
43853 emit_insn (insn (mask, op0, op1,
43854 gen_rtx_fmt_ee (code, mode, op0, op1)));
43855 return mask;
43858 /* Generate and return a rtx of mode MODE for 2**n where n is the number
43859 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
43860 static rtx
43861 ix86_gen_TWO52 (machine_mode mode)
43863 REAL_VALUE_TYPE TWO52r;
43864 rtx TWO52;
43866 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
43867 TWO52 = const_double_from_real_value (TWO52r, mode);
43868 TWO52 = force_reg (mode, TWO52);
43870 return TWO52;
43873 /* Expand SSE sequence for computing lround from OP1 storing
43874 into OP0. */
43875 void
43876 ix86_expand_lround (rtx op0, rtx op1)
43878 /* C code for the stuff we're doing below:
43879 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
43880 return (long)tmp;
43882 machine_mode mode = GET_MODE (op1);
43883 const struct real_format *fmt;
43884 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
43885 rtx adj;
43887 /* load nextafter (0.5, 0.0) */
43888 fmt = REAL_MODE_FORMAT (mode);
43889 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
43890 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
43892 /* adj = copysign (0.5, op1) */
43893 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
43894 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
43896 /* adj = op1 + adj */
43897 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
43899 /* op0 = (imode)adj */
43900 expand_fix (op0, adj, 0);
43903 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
43904 into OPERAND0. */
43905 void
43906 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
43908 /* C code for the stuff we're doing below (for do_floor):
43909 xi = (long)op1;
43910 xi -= (double)xi > op1 ? 1 : 0;
43911 return xi;
43913 machine_mode fmode = GET_MODE (op1);
43914 machine_mode imode = GET_MODE (op0);
43915 rtx ireg, freg, tmp;
43916 rtx_code_label *label;
43918 /* reg = (long)op1 */
43919 ireg = gen_reg_rtx (imode);
43920 expand_fix (ireg, op1, 0);
43922 /* freg = (double)reg */
43923 freg = gen_reg_rtx (fmode);
43924 expand_float (freg, ireg, 0);
43926 /* ireg = (freg > op1) ? ireg - 1 : ireg */
43927 label = ix86_expand_sse_compare_and_jump (UNLE,
43928 freg, op1, !do_floor);
43929 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
43930 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
43931 emit_move_insn (ireg, tmp);
43933 emit_label (label);
43934 LABEL_NUSES (label) = 1;
43936 emit_move_insn (op0, ireg);
43939 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
43940 result in OPERAND0. */
43941 void
43942 ix86_expand_rint (rtx operand0, rtx operand1)
43944 /* C code for the stuff we're doing below:
43945 xa = fabs (operand1);
43946 if (!isless (xa, 2**52))
43947 return operand1;
43948 xa = xa + 2**52 - 2**52;
43949 return copysign (xa, operand1);
43951 machine_mode mode = GET_MODE (operand0);
43952 rtx res, xa, TWO52, mask;
43953 rtx_code_label *label;
43955 res = gen_reg_rtx (mode);
43956 emit_move_insn (res, operand1);
43958 /* xa = abs (operand1) */
43959 xa = ix86_expand_sse_fabs (res, &mask);
43961 /* if (!isless (xa, TWO52)) goto label; */
43962 TWO52 = ix86_gen_TWO52 (mode);
43963 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
43965 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
43966 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
43968 ix86_sse_copysign_to_positive (res, xa, res, mask);
43970 emit_label (label);
43971 LABEL_NUSES (label) = 1;
43973 emit_move_insn (operand0, res);
43976 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
43977 into OPERAND0. */
43978 void
43979 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
43981 /* C code for the stuff we expand below.
43982 double xa = fabs (x), x2;
43983 if (!isless (xa, TWO52))
43984 return x;
43985 xa = xa + TWO52 - TWO52;
43986 x2 = copysign (xa, x);
43987 Compensate. Floor:
43988 if (x2 > x)
43989 x2 -= 1;
43990 Compensate. Ceil:
43991 if (x2 < x)
43992 x2 -= -1;
43993 return x2;
43995 machine_mode mode = GET_MODE (operand0);
43996 rtx xa, TWO52, tmp, one, res, mask;
43997 rtx_code_label *label;
43999 TWO52 = ix86_gen_TWO52 (mode);
44001 /* Temporary for holding the result, initialized to the input
44002 operand to ease control flow. */
44003 res = gen_reg_rtx (mode);
44004 emit_move_insn (res, operand1);
44006 /* xa = abs (operand1) */
44007 xa = ix86_expand_sse_fabs (res, &mask);
44009 /* if (!isless (xa, TWO52)) goto label; */
44010 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44012 /* xa = xa + TWO52 - TWO52; */
44013 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44014 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
44016 /* xa = copysign (xa, operand1) */
44017 ix86_sse_copysign_to_positive (xa, xa, res, mask);
44019 /* generate 1.0 or -1.0 */
44020 one = force_reg (mode,
44021 const_double_from_real_value (do_floor
44022 ? dconst1 : dconstm1, mode));
44024 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
44025 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
44026 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
44027 /* We always need to subtract here to preserve signed zero. */
44028 tmp = expand_simple_binop (mode, MINUS,
44029 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44030 emit_move_insn (res, tmp);
44032 emit_label (label);
44033 LABEL_NUSES (label) = 1;
44035 emit_move_insn (operand0, res);
44038 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
44039 into OPERAND0. */
44040 void
44041 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
44043 /* C code for the stuff we expand below.
44044 double xa = fabs (x), x2;
44045 if (!isless (xa, TWO52))
44046 return x;
44047 x2 = (double)(long)x;
44048 Compensate. Floor:
44049 if (x2 > x)
44050 x2 -= 1;
44051 Compensate. Ceil:
44052 if (x2 < x)
44053 x2 += 1;
44054 if (HONOR_SIGNED_ZEROS (mode))
44055 return copysign (x2, x);
44056 return x2;
44058 machine_mode mode = GET_MODE (operand0);
44059 rtx xa, xi, TWO52, tmp, one, res, mask;
44060 rtx_code_label *label;
44062 TWO52 = ix86_gen_TWO52 (mode);
44064 /* Temporary for holding the result, initialized to the input
44065 operand to ease control flow. */
44066 res = gen_reg_rtx (mode);
44067 emit_move_insn (res, operand1);
44069 /* xa = abs (operand1) */
44070 xa = ix86_expand_sse_fabs (res, &mask);
44072 /* if (!isless (xa, TWO52)) goto label; */
44073 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44075 /* xa = (double)(long)x */
44076 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
44077 expand_fix (xi, res, 0);
44078 expand_float (xa, xi, 0);
44080 /* generate 1.0 */
44081 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
44083 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
44084 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
44085 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
44086 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
44087 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44088 emit_move_insn (res, tmp);
44090 if (HONOR_SIGNED_ZEROS (mode))
44091 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
44093 emit_label (label);
44094 LABEL_NUSES (label) = 1;
44096 emit_move_insn (operand0, res);
44099 /* Expand SSE sequence for computing round from OPERAND1 storing
44100 into OPERAND0. Sequence that works without relying on DImode truncation
44101 via cvttsd2siq that is only available on 64bit targets. */
44102 void
44103 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
44105 /* C code for the stuff we expand below.
44106 double xa = fabs (x), xa2, x2;
44107 if (!isless (xa, TWO52))
44108 return x;
44109 Using the absolute value and copying back sign makes
44110 -0.0 -> -0.0 correct.
44111 xa2 = xa + TWO52 - TWO52;
44112 Compensate.
44113 dxa = xa2 - xa;
44114 if (dxa <= -0.5)
44115 xa2 += 1;
44116 else if (dxa > 0.5)
44117 xa2 -= 1;
44118 x2 = copysign (xa2, x);
44119 return x2;
44121 machine_mode mode = GET_MODE (operand0);
44122 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
44123 rtx_code_label *label;
44125 TWO52 = ix86_gen_TWO52 (mode);
44127 /* Temporary for holding the result, initialized to the input
44128 operand to ease control flow. */
44129 res = gen_reg_rtx (mode);
44130 emit_move_insn (res, operand1);
44132 /* xa = abs (operand1) */
44133 xa = ix86_expand_sse_fabs (res, &mask);
44135 /* if (!isless (xa, TWO52)) goto label; */
44136 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44138 /* xa2 = xa + TWO52 - TWO52; */
44139 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44140 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
44142 /* dxa = xa2 - xa; */
44143 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
44145 /* generate 0.5, 1.0 and -0.5 */
44146 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
44147 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
44148 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
44149 0, OPTAB_DIRECT);
44151 /* Compensate. */
44152 tmp = gen_reg_rtx (mode);
44153 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
44154 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
44155 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
44156 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44157 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
44158 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
44159 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
44160 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44162 /* res = copysign (xa2, operand1) */
44163 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
44165 emit_label (label);
44166 LABEL_NUSES (label) = 1;
44168 emit_move_insn (operand0, res);
44171 /* Expand SSE sequence for computing trunc from OPERAND1 storing
44172 into OPERAND0. */
44173 void
44174 ix86_expand_trunc (rtx operand0, rtx operand1)
44176 /* C code for SSE variant we expand below.
44177 double xa = fabs (x), x2;
44178 if (!isless (xa, TWO52))
44179 return x;
44180 x2 = (double)(long)x;
44181 if (HONOR_SIGNED_ZEROS (mode))
44182 return copysign (x2, x);
44183 return x2;
44185 machine_mode mode = GET_MODE (operand0);
44186 rtx xa, xi, TWO52, res, mask;
44187 rtx_code_label *label;
44189 TWO52 = ix86_gen_TWO52 (mode);
44191 /* Temporary for holding the result, initialized to the input
44192 operand to ease control flow. */
44193 res = gen_reg_rtx (mode);
44194 emit_move_insn (res, operand1);
44196 /* xa = abs (operand1) */
44197 xa = ix86_expand_sse_fabs (res, &mask);
44199 /* if (!isless (xa, TWO52)) goto label; */
44200 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44202 /* x = (double)(long)x */
44203 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
44204 expand_fix (xi, res, 0);
44205 expand_float (res, xi, 0);
44207 if (HONOR_SIGNED_ZEROS (mode))
44208 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
44210 emit_label (label);
44211 LABEL_NUSES (label) = 1;
44213 emit_move_insn (operand0, res);
44216 /* Expand SSE sequence for computing trunc from OPERAND1 storing
44217 into OPERAND0. */
44218 void
44219 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
44221 machine_mode mode = GET_MODE (operand0);
44222 rtx xa, mask, TWO52, one, res, smask, tmp;
44223 rtx_code_label *label;
44225 /* C code for SSE variant we expand below.
44226 double xa = fabs (x), x2;
44227 if (!isless (xa, TWO52))
44228 return x;
44229 xa2 = xa + TWO52 - TWO52;
44230 Compensate:
44231 if (xa2 > xa)
44232 xa2 -= 1.0;
44233 x2 = copysign (xa2, x);
44234 return x2;
44237 TWO52 = ix86_gen_TWO52 (mode);
44239 /* Temporary for holding the result, initialized to the input
44240 operand to ease control flow. */
44241 res = gen_reg_rtx (mode);
44242 emit_move_insn (res, operand1);
44244 /* xa = abs (operand1) */
44245 xa = ix86_expand_sse_fabs (res, &smask);
44247 /* if (!isless (xa, TWO52)) goto label; */
44248 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44250 /* res = xa + TWO52 - TWO52; */
44251 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44252 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
44253 emit_move_insn (res, tmp);
44255 /* generate 1.0 */
44256 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
44258 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
44259 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
44260 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
44261 tmp = expand_simple_binop (mode, MINUS,
44262 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
44263 emit_move_insn (res, tmp);
44265 /* res = copysign (res, operand1) */
44266 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
44268 emit_label (label);
44269 LABEL_NUSES (label) = 1;
44271 emit_move_insn (operand0, res);
44274 /* Expand SSE sequence for computing round from OPERAND1 storing
44275 into OPERAND0. */
44276 void
44277 ix86_expand_round (rtx operand0, rtx operand1)
44279 /* C code for the stuff we're doing below:
44280 double xa = fabs (x);
44281 if (!isless (xa, TWO52))
44282 return x;
44283 xa = (double)(long)(xa + nextafter (0.5, 0.0));
44284 return copysign (xa, x);
44286 machine_mode mode = GET_MODE (operand0);
44287 rtx res, TWO52, xa, xi, half, mask;
44288 rtx_code_label *label;
44289 const struct real_format *fmt;
44290 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
44292 /* Temporary for holding the result, initialized to the input
44293 operand to ease control flow. */
44294 res = gen_reg_rtx (mode);
44295 emit_move_insn (res, operand1);
44297 TWO52 = ix86_gen_TWO52 (mode);
44298 xa = ix86_expand_sse_fabs (res, &mask);
44299 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44301 /* load nextafter (0.5, 0.0) */
44302 fmt = REAL_MODE_FORMAT (mode);
44303 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
44304 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
44306 /* xa = xa + 0.5 */
44307 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
44308 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
44310 /* xa = (double)(int64_t)xa */
44311 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
44312 expand_fix (xi, xa, 0);
44313 expand_float (xa, xi, 0);
44315 /* res = copysign (xa, operand1) */
44316 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
44318 emit_label (label);
44319 LABEL_NUSES (label) = 1;
44321 emit_move_insn (operand0, res);
44324 /* Expand SSE sequence for computing round
44325 from OP1 storing into OP0 using sse4 round insn. */
44326 void
44327 ix86_expand_round_sse4 (rtx op0, rtx op1)
44329 machine_mode mode = GET_MODE (op0);
44330 rtx e1, e2, res, half;
44331 const struct real_format *fmt;
44332 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
44333 rtx (*gen_copysign) (rtx, rtx, rtx);
44334 rtx (*gen_round) (rtx, rtx, rtx);
44336 switch (mode)
44338 case E_SFmode:
44339 gen_copysign = gen_copysignsf3;
44340 gen_round = gen_sse4_1_roundsf2;
44341 break;
44342 case E_DFmode:
44343 gen_copysign = gen_copysigndf3;
44344 gen_round = gen_sse4_1_rounddf2;
44345 break;
44346 default:
44347 gcc_unreachable ();
44350 /* round (a) = trunc (a + copysign (0.5, a)) */
44352 /* load nextafter (0.5, 0.0) */
44353 fmt = REAL_MODE_FORMAT (mode);
44354 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
44355 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
44356 half = const_double_from_real_value (pred_half, mode);
44358 /* e1 = copysign (0.5, op1) */
44359 e1 = gen_reg_rtx (mode);
44360 emit_insn (gen_copysign (e1, half, op1));
44362 /* e2 = op1 + e1 */
44363 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
44365 /* res = trunc (e2) */
44366 res = gen_reg_rtx (mode);
44367 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
44369 emit_move_insn (op0, res);
44373 /* Table of valid machine attributes. */
44374 static const struct attribute_spec ix86_attribute_table[] =
44376 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
44377 affects_type_identity } */
44378 /* Stdcall attribute says callee is responsible for popping arguments
44379 if they are not variable. */
44380 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
44381 true },
44382 /* Fastcall attribute says callee is responsible for popping arguments
44383 if they are not variable. */
44384 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
44385 true },
44386 /* Thiscall attribute says callee is responsible for popping arguments
44387 if they are not variable. */
44388 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
44389 true },
44390 /* Cdecl attribute says the callee is a normal C declaration */
44391 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
44392 true },
44393 /* Regparm attribute specifies how many integer arguments are to be
44394 passed in registers. */
44395 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
44396 true },
44397 /* Sseregparm attribute says we are using x86_64 calling conventions
44398 for FP arguments. */
44399 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
44400 true },
44401 /* The transactional memory builtins are implicitly regparm or fastcall
44402 depending on the ABI. Override the generic do-nothing attribute that
44403 these builtins were declared with. */
44404 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
44405 true },
44406 /* force_align_arg_pointer says this function realigns the stack at entry. */
44407 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
44408 false, true, true, ix86_handle_force_align_arg_pointer_attribute, false },
44409 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
44410 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
44411 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
44412 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
44413 false },
44414 #endif
44415 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
44416 false },
44417 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
44418 false },
44419 #ifdef SUBTARGET_ATTRIBUTE_TABLE
44420 SUBTARGET_ATTRIBUTE_TABLE,
44421 #endif
44422 /* ms_abi and sysv_abi calling convention function attributes. */
44423 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
44424 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
44425 { "ms_abi va_list", 0, 0, false, false, false, NULL, false },
44426 { "sysv_abi va_list", 0, 0, false, false, false, NULL, false },
44427 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
44428 false },
44429 { "callee_pop_aggregate_return", 1, 1, false, true, true,
44430 ix86_handle_callee_pop_aggregate_return, true },
44431 { "interrupt", 0, 0, false, true, true,
44432 ix86_handle_interrupt_attribute, false },
44433 { "no_caller_saved_registers", 0, 0, false, true, true,
44434 ix86_handle_no_caller_saved_registers_attribute, false },
44435 { "naked", 0, 0, true, false, false,
44436 ix86_handle_fndecl_attribute, false },
44438 /* End element. */
44439 { NULL, 0, 0, false, false, false, NULL, false }
44442 /* Implement targetm.vectorize.builtin_vectorization_cost. */
44443 static int
44444 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
44445 tree vectype, int)
44447 bool fp = false;
44448 machine_mode mode = TImode;
44449 int index;
44450 if (vectype != NULL)
44452 fp = FLOAT_TYPE_P (vectype);
44453 mode = TYPE_MODE (vectype);
44456 switch (type_of_cost)
44458 case scalar_stmt:
44459 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
44461 case scalar_load:
44462 /* load/store costs are relative to register move which is 2. Recompute
44463 it to COSTS_N_INSNS so everything have same base. */
44464 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
44465 : ix86_cost->int_load [2]) / 2;
44467 case scalar_store:
44468 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
44469 : ix86_cost->int_store [2]) / 2;
44471 case vector_stmt:
44472 return ix86_vec_cost (mode,
44473 fp ? ix86_cost->addss : ix86_cost->sse_op,
44474 true);
44476 case vector_load:
44477 index = sse_store_index (mode);
44478 gcc_assert (index >= 0);
44479 return ix86_vec_cost (mode,
44480 COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2,
44481 true);
44483 case vector_store:
44484 index = sse_store_index (mode);
44485 return ix86_vec_cost (mode,
44486 COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2,
44487 true);
44489 case vec_to_scalar:
44490 case scalar_to_vec:
44491 return ix86_vec_cost (mode, ix86_cost->sse_op, true);
44493 /* We should have separate costs for unaligned loads and gather/scatter.
44494 Do that incrementally. */
44495 case unaligned_load:
44496 index = sse_store_index (mode);
44497 return ix86_vec_cost (mode,
44498 COSTS_N_INSNS
44499 (ix86_cost->sse_unaligned_load[index]) / 2,
44500 true);
44502 case unaligned_store:
44503 index = sse_store_index (mode);
44504 return ix86_vec_cost (mode,
44505 COSTS_N_INSNS
44506 (ix86_cost->sse_unaligned_store[index]) / 2,
44507 true);
44509 case vector_gather_load:
44510 return ix86_vec_cost (mode,
44511 COSTS_N_INSNS
44512 (ix86_cost->gather_static
44513 + ix86_cost->gather_per_elt
44514 * TYPE_VECTOR_SUBPARTS (vectype)) / 2,
44515 true);
44517 case vector_scatter_store:
44518 return ix86_vec_cost (mode,
44519 COSTS_N_INSNS
44520 (ix86_cost->scatter_static
44521 + ix86_cost->scatter_per_elt
44522 * TYPE_VECTOR_SUBPARTS (vectype)) / 2,
44523 true);
44525 case cond_branch_taken:
44526 return ix86_cost->cond_taken_branch_cost;
44528 case cond_branch_not_taken:
44529 return ix86_cost->cond_not_taken_branch_cost;
44531 case vec_perm:
44532 case vec_promote_demote:
44533 return ix86_vec_cost (mode,
44534 ix86_cost->sse_op, true);
44536 case vec_construct:
44537 return ix86_vec_cost (mode, ix86_cost->sse_op, false);
44539 default:
44540 gcc_unreachable ();
44544 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
44545 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
44546 insn every time. */
44548 static GTY(()) rtx_insn *vselect_insn;
44550 /* Initialize vselect_insn. */
44552 static void
44553 init_vselect_insn (void)
44555 unsigned i;
44556 rtx x;
44558 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
44559 for (i = 0; i < MAX_VECT_LEN; ++i)
44560 XVECEXP (x, 0, i) = const0_rtx;
44561 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
44562 const0_rtx), x);
44563 x = gen_rtx_SET (const0_rtx, x);
44564 start_sequence ();
44565 vselect_insn = emit_insn (x);
44566 end_sequence ();
44569 /* Construct (set target (vec_select op0 (parallel perm))) and
44570 return true if that's a valid instruction in the active ISA. */
44572 static bool
44573 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
44574 unsigned nelt, bool testing_p)
44576 unsigned int i;
44577 rtx x, save_vconcat;
44578 int icode;
44580 if (vselect_insn == NULL_RTX)
44581 init_vselect_insn ();
44583 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
44584 PUT_NUM_ELEM (XVEC (x, 0), nelt);
44585 for (i = 0; i < nelt; ++i)
44586 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
44587 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
44588 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
44589 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
44590 SET_DEST (PATTERN (vselect_insn)) = target;
44591 icode = recog_memoized (vselect_insn);
44593 if (icode >= 0 && !testing_p)
44594 emit_insn (copy_rtx (PATTERN (vselect_insn)));
44596 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
44597 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
44598 INSN_CODE (vselect_insn) = -1;
44600 return icode >= 0;
44603 /* Similar, but generate a vec_concat from op0 and op1 as well. */
44605 static bool
44606 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
44607 const unsigned char *perm, unsigned nelt,
44608 bool testing_p)
44610 machine_mode v2mode;
44611 rtx x;
44612 bool ok;
44614 if (vselect_insn == NULL_RTX)
44615 init_vselect_insn ();
44617 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
44618 return false;
44619 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
44620 PUT_MODE (x, v2mode);
44621 XEXP (x, 0) = op0;
44622 XEXP (x, 1) = op1;
44623 ok = expand_vselect (target, x, perm, nelt, testing_p);
44624 XEXP (x, 0) = const0_rtx;
44625 XEXP (x, 1) = const0_rtx;
44626 return ok;
44629 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
44630 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
44632 static bool
44633 expand_vec_perm_blend (struct expand_vec_perm_d *d)
44635 machine_mode mmode, vmode = d->vmode;
44636 unsigned i, mask, nelt = d->nelt;
44637 rtx target, op0, op1, maskop, x;
44638 rtx rperm[32], vperm;
44640 if (d->one_operand_p)
44641 return false;
44642 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
44643 && (TARGET_AVX512BW
44644 || GET_MODE_UNIT_SIZE (vmode) >= 4))
44646 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
44648 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
44650 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
44652 else
44653 return false;
44655 /* This is a blend, not a permute. Elements must stay in their
44656 respective lanes. */
44657 for (i = 0; i < nelt; ++i)
44659 unsigned e = d->perm[i];
44660 if (!(e == i || e == i + nelt))
44661 return false;
44664 if (d->testing_p)
44665 return true;
44667 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
44668 decision should be extracted elsewhere, so that we only try that
44669 sequence once all budget==3 options have been tried. */
44670 target = d->target;
44671 op0 = d->op0;
44672 op1 = d->op1;
44673 mask = 0;
44675 switch (vmode)
44677 case E_V8DFmode:
44678 case E_V16SFmode:
44679 case E_V4DFmode:
44680 case E_V8SFmode:
44681 case E_V2DFmode:
44682 case E_V4SFmode:
44683 case E_V8HImode:
44684 case E_V8SImode:
44685 case E_V32HImode:
44686 case E_V64QImode:
44687 case E_V16SImode:
44688 case E_V8DImode:
44689 for (i = 0; i < nelt; ++i)
44690 mask |= (d->perm[i] >= nelt) << i;
44691 break;
44693 case E_V2DImode:
44694 for (i = 0; i < 2; ++i)
44695 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
44696 vmode = V8HImode;
44697 goto do_subreg;
44699 case E_V4SImode:
44700 for (i = 0; i < 4; ++i)
44701 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
44702 vmode = V8HImode;
44703 goto do_subreg;
44705 case E_V16QImode:
44706 /* See if bytes move in pairs so we can use pblendw with
44707 an immediate argument, rather than pblendvb with a vector
44708 argument. */
44709 for (i = 0; i < 16; i += 2)
44710 if (d->perm[i] + 1 != d->perm[i + 1])
44712 use_pblendvb:
44713 for (i = 0; i < nelt; ++i)
44714 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
44716 finish_pblendvb:
44717 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
44718 vperm = force_reg (vmode, vperm);
44720 if (GET_MODE_SIZE (vmode) == 16)
44721 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
44722 else
44723 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
44724 if (target != d->target)
44725 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
44726 return true;
44729 for (i = 0; i < 8; ++i)
44730 mask |= (d->perm[i * 2] >= 16) << i;
44731 vmode = V8HImode;
44732 /* FALLTHRU */
44734 do_subreg:
44735 target = gen_reg_rtx (vmode);
44736 op0 = gen_lowpart (vmode, op0);
44737 op1 = gen_lowpart (vmode, op1);
44738 break;
44740 case E_V32QImode:
44741 /* See if bytes move in pairs. If not, vpblendvb must be used. */
44742 for (i = 0; i < 32; i += 2)
44743 if (d->perm[i] + 1 != d->perm[i + 1])
44744 goto use_pblendvb;
44745 /* See if bytes move in quadruplets. If yes, vpblendd
44746 with immediate can be used. */
44747 for (i = 0; i < 32; i += 4)
44748 if (d->perm[i] + 2 != d->perm[i + 2])
44749 break;
44750 if (i < 32)
44752 /* See if bytes move the same in both lanes. If yes,
44753 vpblendw with immediate can be used. */
44754 for (i = 0; i < 16; i += 2)
44755 if (d->perm[i] + 16 != d->perm[i + 16])
44756 goto use_pblendvb;
44758 /* Use vpblendw. */
44759 for (i = 0; i < 16; ++i)
44760 mask |= (d->perm[i * 2] >= 32) << i;
44761 vmode = V16HImode;
44762 goto do_subreg;
44765 /* Use vpblendd. */
44766 for (i = 0; i < 8; ++i)
44767 mask |= (d->perm[i * 4] >= 32) << i;
44768 vmode = V8SImode;
44769 goto do_subreg;
44771 case E_V16HImode:
44772 /* See if words move in pairs. If yes, vpblendd can be used. */
44773 for (i = 0; i < 16; i += 2)
44774 if (d->perm[i] + 1 != d->perm[i + 1])
44775 break;
44776 if (i < 16)
44778 /* See if words move the same in both lanes. If not,
44779 vpblendvb must be used. */
44780 for (i = 0; i < 8; i++)
44781 if (d->perm[i] + 8 != d->perm[i + 8])
44783 /* Use vpblendvb. */
44784 for (i = 0; i < 32; ++i)
44785 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
44787 vmode = V32QImode;
44788 nelt = 32;
44789 target = gen_reg_rtx (vmode);
44790 op0 = gen_lowpart (vmode, op0);
44791 op1 = gen_lowpart (vmode, op1);
44792 goto finish_pblendvb;
44795 /* Use vpblendw. */
44796 for (i = 0; i < 16; ++i)
44797 mask |= (d->perm[i] >= 16) << i;
44798 break;
44801 /* Use vpblendd. */
44802 for (i = 0; i < 8; ++i)
44803 mask |= (d->perm[i * 2] >= 16) << i;
44804 vmode = V8SImode;
44805 goto do_subreg;
44807 case E_V4DImode:
44808 /* Use vpblendd. */
44809 for (i = 0; i < 4; ++i)
44810 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
44811 vmode = V8SImode;
44812 goto do_subreg;
44814 default:
44815 gcc_unreachable ();
44818 switch (vmode)
44820 case E_V8DFmode:
44821 case E_V8DImode:
44822 mmode = QImode;
44823 break;
44824 case E_V16SFmode:
44825 case E_V16SImode:
44826 mmode = HImode;
44827 break;
44828 case E_V32HImode:
44829 mmode = SImode;
44830 break;
44831 case E_V64QImode:
44832 mmode = DImode;
44833 break;
44834 default:
44835 mmode = VOIDmode;
44838 if (mmode != VOIDmode)
44839 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
44840 else
44841 maskop = GEN_INT (mask);
44843 /* This matches five different patterns with the different modes. */
44844 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
44845 x = gen_rtx_SET (target, x);
44846 emit_insn (x);
44847 if (target != d->target)
44848 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
44850 return true;
44853 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
44854 in terms of the variable form of vpermilps.
44856 Note that we will have already failed the immediate input vpermilps,
44857 which requires that the high and low part shuffle be identical; the
44858 variable form doesn't require that. */
44860 static bool
44861 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
44863 rtx rperm[8], vperm;
44864 unsigned i;
44866 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
44867 return false;
44869 /* We can only permute within the 128-bit lane. */
44870 for (i = 0; i < 8; ++i)
44872 unsigned e = d->perm[i];
44873 if (i < 4 ? e >= 4 : e < 4)
44874 return false;
44877 if (d->testing_p)
44878 return true;
44880 for (i = 0; i < 8; ++i)
44882 unsigned e = d->perm[i];
44884 /* Within each 128-bit lane, the elements of op0 are numbered
44885 from 0 and the elements of op1 are numbered from 4. */
44886 if (e >= 8 + 4)
44887 e -= 8;
44888 else if (e >= 4)
44889 e -= 4;
44891 rperm[i] = GEN_INT (e);
44894 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
44895 vperm = force_reg (V8SImode, vperm);
44896 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
44898 return true;
44901 /* Return true if permutation D can be performed as VMODE permutation
44902 instead. */
44904 static bool
44905 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
44907 unsigned int i, j, chunk;
44909 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
44910 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
44911 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
44912 return false;
44914 if (GET_MODE_NUNITS (vmode) >= d->nelt)
44915 return true;
44917 chunk = d->nelt / GET_MODE_NUNITS (vmode);
44918 for (i = 0; i < d->nelt; i += chunk)
44919 if (d->perm[i] & (chunk - 1))
44920 return false;
44921 else
44922 for (j = 1; j < chunk; ++j)
44923 if (d->perm[i] + j != d->perm[i + j])
44924 return false;
44926 return true;
44929 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
44930 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
44932 static bool
44933 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
44935 unsigned i, nelt, eltsz, mask;
44936 unsigned char perm[64];
44937 machine_mode vmode = V16QImode;
44938 rtx rperm[64], vperm, target, op0, op1;
44940 nelt = d->nelt;
44942 if (!d->one_operand_p)
44944 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
44946 if (TARGET_AVX2
44947 && valid_perm_using_mode_p (V2TImode, d))
44949 if (d->testing_p)
44950 return true;
44952 /* Use vperm2i128 insn. The pattern uses
44953 V4DImode instead of V2TImode. */
44954 target = d->target;
44955 if (d->vmode != V4DImode)
44956 target = gen_reg_rtx (V4DImode);
44957 op0 = gen_lowpart (V4DImode, d->op0);
44958 op1 = gen_lowpart (V4DImode, d->op1);
44959 rperm[0]
44960 = GEN_INT ((d->perm[0] / (nelt / 2))
44961 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
44962 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
44963 if (target != d->target)
44964 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
44965 return true;
44967 return false;
44970 else
44972 if (GET_MODE_SIZE (d->vmode) == 16)
44974 if (!TARGET_SSSE3)
44975 return false;
44977 else if (GET_MODE_SIZE (d->vmode) == 32)
44979 if (!TARGET_AVX2)
44980 return false;
44982 /* V4DImode should be already handled through
44983 expand_vselect by vpermq instruction. */
44984 gcc_assert (d->vmode != V4DImode);
44986 vmode = V32QImode;
44987 if (d->vmode == V8SImode
44988 || d->vmode == V16HImode
44989 || d->vmode == V32QImode)
44991 /* First see if vpermq can be used for
44992 V8SImode/V16HImode/V32QImode. */
44993 if (valid_perm_using_mode_p (V4DImode, d))
44995 for (i = 0; i < 4; i++)
44996 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
44997 if (d->testing_p)
44998 return true;
44999 target = gen_reg_rtx (V4DImode);
45000 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
45001 perm, 4, false))
45003 emit_move_insn (d->target,
45004 gen_lowpart (d->vmode, target));
45005 return true;
45007 return false;
45010 /* Next see if vpermd can be used. */
45011 if (valid_perm_using_mode_p (V8SImode, d))
45012 vmode = V8SImode;
45014 /* Or if vpermps can be used. */
45015 else if (d->vmode == V8SFmode)
45016 vmode = V8SImode;
45018 if (vmode == V32QImode)
45020 /* vpshufb only works intra lanes, it is not
45021 possible to shuffle bytes in between the lanes. */
45022 for (i = 0; i < nelt; ++i)
45023 if ((d->perm[i] ^ i) & (nelt / 2))
45024 return false;
45027 else if (GET_MODE_SIZE (d->vmode) == 64)
45029 if (!TARGET_AVX512BW)
45030 return false;
45032 /* If vpermq didn't work, vpshufb won't work either. */
45033 if (d->vmode == V8DFmode || d->vmode == V8DImode)
45034 return false;
45036 vmode = V64QImode;
45037 if (d->vmode == V16SImode
45038 || d->vmode == V32HImode
45039 || d->vmode == V64QImode)
45041 /* First see if vpermq can be used for
45042 V16SImode/V32HImode/V64QImode. */
45043 if (valid_perm_using_mode_p (V8DImode, d))
45045 for (i = 0; i < 8; i++)
45046 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
45047 if (d->testing_p)
45048 return true;
45049 target = gen_reg_rtx (V8DImode);
45050 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
45051 perm, 8, false))
45053 emit_move_insn (d->target,
45054 gen_lowpart (d->vmode, target));
45055 return true;
45057 return false;
45060 /* Next see if vpermd can be used. */
45061 if (valid_perm_using_mode_p (V16SImode, d))
45062 vmode = V16SImode;
45064 /* Or if vpermps can be used. */
45065 else if (d->vmode == V16SFmode)
45066 vmode = V16SImode;
45067 if (vmode == V64QImode)
45069 /* vpshufb only works intra lanes, it is not
45070 possible to shuffle bytes in between the lanes. */
45071 for (i = 0; i < nelt; ++i)
45072 if ((d->perm[i] ^ i) & (nelt / 4))
45073 return false;
45076 else
45077 return false;
45080 if (d->testing_p)
45081 return true;
45083 if (vmode == V8SImode)
45084 for (i = 0; i < 8; ++i)
45085 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
45086 else if (vmode == V16SImode)
45087 for (i = 0; i < 16; ++i)
45088 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
45089 else
45091 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
45092 if (!d->one_operand_p)
45093 mask = 2 * nelt - 1;
45094 else if (vmode == V16QImode)
45095 mask = nelt - 1;
45096 else if (vmode == V64QImode)
45097 mask = nelt / 4 - 1;
45098 else
45099 mask = nelt / 2 - 1;
45101 for (i = 0; i < nelt; ++i)
45103 unsigned j, e = d->perm[i] & mask;
45104 for (j = 0; j < eltsz; ++j)
45105 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
45109 vperm = gen_rtx_CONST_VECTOR (vmode,
45110 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
45111 vperm = force_reg (vmode, vperm);
45113 target = d->target;
45114 if (d->vmode != vmode)
45115 target = gen_reg_rtx (vmode);
45116 op0 = gen_lowpart (vmode, d->op0);
45117 if (d->one_operand_p)
45119 if (vmode == V16QImode)
45120 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
45121 else if (vmode == V32QImode)
45122 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
45123 else if (vmode == V64QImode)
45124 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
45125 else if (vmode == V8SFmode)
45126 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
45127 else if (vmode == V8SImode)
45128 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
45129 else if (vmode == V16SFmode)
45130 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
45131 else if (vmode == V16SImode)
45132 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
45133 else
45134 gcc_unreachable ();
45136 else
45138 op1 = gen_lowpart (vmode, d->op1);
45139 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
45141 if (target != d->target)
45142 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45144 return true;
45147 /* For V*[QHS]Imode permutations, check if the same permutation
45148 can't be performed in a 2x, 4x or 8x wider inner mode. */
45150 static bool
45151 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
45152 struct expand_vec_perm_d *nd)
45154 int i;
45155 machine_mode mode = VOIDmode;
45157 switch (d->vmode)
45159 case E_V16QImode: mode = V8HImode; break;
45160 case E_V32QImode: mode = V16HImode; break;
45161 case E_V64QImode: mode = V32HImode; break;
45162 case E_V8HImode: mode = V4SImode; break;
45163 case E_V16HImode: mode = V8SImode; break;
45164 case E_V32HImode: mode = V16SImode; break;
45165 case E_V4SImode: mode = V2DImode; break;
45166 case E_V8SImode: mode = V4DImode; break;
45167 case E_V16SImode: mode = V8DImode; break;
45168 default: return false;
45170 for (i = 0; i < d->nelt; i += 2)
45171 if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
45172 return false;
45173 nd->vmode = mode;
45174 nd->nelt = d->nelt / 2;
45175 for (i = 0; i < nd->nelt; i++)
45176 nd->perm[i] = d->perm[2 * i] / 2;
45177 if (GET_MODE_INNER (mode) != DImode)
45178 canonicalize_vector_int_perm (nd, nd);
45179 if (nd != d)
45181 nd->one_operand_p = d->one_operand_p;
45182 nd->testing_p = d->testing_p;
45183 if (d->op0 == d->op1)
45184 nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
45185 else
45187 nd->op0 = gen_lowpart (nd->vmode, d->op0);
45188 nd->op1 = gen_lowpart (nd->vmode, d->op1);
45190 if (d->testing_p)
45191 nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
45192 else
45193 nd->target = gen_reg_rtx (nd->vmode);
45195 return true;
45198 /* Try to expand one-operand permutation with constant mask. */
45200 static bool
45201 ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d)
45203 machine_mode mode = GET_MODE (d->op0);
45204 machine_mode maskmode = mode;
45205 rtx (*gen) (rtx, rtx, rtx) = NULL;
45206 rtx target, op0, mask;
45207 rtx vec[64];
45209 if (!rtx_equal_p (d->op0, d->op1))
45210 return false;
45212 if (!TARGET_AVX512F)
45213 return false;
45215 switch (mode)
45217 case E_V16SImode:
45218 gen = gen_avx512f_permvarv16si;
45219 break;
45220 case E_V16SFmode:
45221 gen = gen_avx512f_permvarv16sf;
45222 maskmode = V16SImode;
45223 break;
45224 case E_V8DImode:
45225 gen = gen_avx512f_permvarv8di;
45226 break;
45227 case E_V8DFmode:
45228 gen = gen_avx512f_permvarv8df;
45229 maskmode = V8DImode;
45230 break;
45231 default:
45232 return false;
45235 target = d->target;
45236 op0 = d->op0;
45237 for (int i = 0; i < d->nelt; ++i)
45238 vec[i] = GEN_INT (d->perm[i]);
45239 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
45240 emit_insn (gen (target, op0, force_reg (maskmode, mask)));
45241 return true;
45244 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
45245 in a single instruction. */
45247 static bool
45248 expand_vec_perm_1 (struct expand_vec_perm_d *d)
45250 unsigned i, nelt = d->nelt;
45251 struct expand_vec_perm_d nd;
45253 /* Check plain VEC_SELECT first, because AVX has instructions that could
45254 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
45255 input where SEL+CONCAT may not. */
45256 if (d->one_operand_p)
45258 int mask = nelt - 1;
45259 bool identity_perm = true;
45260 bool broadcast_perm = true;
45262 for (i = 0; i < nelt; i++)
45264 nd.perm[i] = d->perm[i] & mask;
45265 if (nd.perm[i] != i)
45266 identity_perm = false;
45267 if (nd.perm[i])
45268 broadcast_perm = false;
45271 if (identity_perm)
45273 if (!d->testing_p)
45274 emit_move_insn (d->target, d->op0);
45275 return true;
45277 else if (broadcast_perm && TARGET_AVX2)
45279 /* Use vpbroadcast{b,w,d}. */
45280 rtx (*gen) (rtx, rtx) = NULL;
45281 switch (d->vmode)
45283 case E_V64QImode:
45284 if (TARGET_AVX512BW)
45285 gen = gen_avx512bw_vec_dupv64qi_1;
45286 break;
45287 case E_V32QImode:
45288 gen = gen_avx2_pbroadcastv32qi_1;
45289 break;
45290 case E_V32HImode:
45291 if (TARGET_AVX512BW)
45292 gen = gen_avx512bw_vec_dupv32hi_1;
45293 break;
45294 case E_V16HImode:
45295 gen = gen_avx2_pbroadcastv16hi_1;
45296 break;
45297 case E_V16SImode:
45298 if (TARGET_AVX512F)
45299 gen = gen_avx512f_vec_dupv16si_1;
45300 break;
45301 case E_V8SImode:
45302 gen = gen_avx2_pbroadcastv8si_1;
45303 break;
45304 case E_V16QImode:
45305 gen = gen_avx2_pbroadcastv16qi;
45306 break;
45307 case E_V8HImode:
45308 gen = gen_avx2_pbroadcastv8hi;
45309 break;
45310 case E_V16SFmode:
45311 if (TARGET_AVX512F)
45312 gen = gen_avx512f_vec_dupv16sf_1;
45313 break;
45314 case E_V8SFmode:
45315 gen = gen_avx2_vec_dupv8sf_1;
45316 break;
45317 case E_V8DFmode:
45318 if (TARGET_AVX512F)
45319 gen = gen_avx512f_vec_dupv8df_1;
45320 break;
45321 case E_V8DImode:
45322 if (TARGET_AVX512F)
45323 gen = gen_avx512f_vec_dupv8di_1;
45324 break;
45325 /* For other modes prefer other shuffles this function creates. */
45326 default: break;
45328 if (gen != NULL)
45330 if (!d->testing_p)
45331 emit_insn (gen (d->target, d->op0));
45332 return true;
45336 if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
45337 return true;
45339 /* There are plenty of patterns in sse.md that are written for
45340 SEL+CONCAT and are not replicated for a single op. Perhaps
45341 that should be changed, to avoid the nastiness here. */
45343 /* Recognize interleave style patterns, which means incrementing
45344 every other permutation operand. */
45345 for (i = 0; i < nelt; i += 2)
45347 nd.perm[i] = d->perm[i] & mask;
45348 nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
45350 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
45351 d->testing_p))
45352 return true;
45354 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
45355 if (nelt >= 4)
45357 for (i = 0; i < nelt; i += 4)
45359 nd.perm[i + 0] = d->perm[i + 0] & mask;
45360 nd.perm[i + 1] = d->perm[i + 1] & mask;
45361 nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
45362 nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
45365 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
45366 d->testing_p))
45367 return true;
45371 /* Finally, try the fully general two operand permute. */
45372 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
45373 d->testing_p))
45374 return true;
45376 /* Recognize interleave style patterns with reversed operands. */
45377 if (!d->one_operand_p)
45379 for (i = 0; i < nelt; ++i)
45381 unsigned e = d->perm[i];
45382 if (e >= nelt)
45383 e -= nelt;
45384 else
45385 e += nelt;
45386 nd.perm[i] = e;
45389 if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
45390 d->testing_p))
45391 return true;
45394 /* Try the SSE4.1 blend variable merge instructions. */
45395 if (expand_vec_perm_blend (d))
45396 return true;
45398 /* Try one of the AVX vpermil variable permutations. */
45399 if (expand_vec_perm_vpermil (d))
45400 return true;
45402 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
45403 vpshufb, vpermd, vpermps or vpermq variable permutation. */
45404 if (expand_vec_perm_pshufb (d))
45405 return true;
45407 /* Try the AVX2 vpalignr instruction. */
45408 if (expand_vec_perm_palignr (d, true))
45409 return true;
45411 /* Try the AVX512F vperm{s,d} instructions. */
45412 if (ix86_expand_vec_one_operand_perm_avx512 (d))
45413 return true;
45415 /* Try the AVX512F vpermt2/vpermi2 instructions. */
45416 if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
45417 return true;
45419 /* See if we can get the same permutation in different vector integer
45420 mode. */
45421 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
45423 if (!d->testing_p)
45424 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
45425 return true;
45427 return false;
45430 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45431 in terms of a pair of pshuflw + pshufhw instructions. */
45433 static bool
45434 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
45436 unsigned char perm2[MAX_VECT_LEN];
45437 unsigned i;
45438 bool ok;
45440 if (d->vmode != V8HImode || !d->one_operand_p)
45441 return false;
45443 /* The two permutations only operate in 64-bit lanes. */
45444 for (i = 0; i < 4; ++i)
45445 if (d->perm[i] >= 4)
45446 return false;
45447 for (i = 4; i < 8; ++i)
45448 if (d->perm[i] < 4)
45449 return false;
45451 if (d->testing_p)
45452 return true;
45454 /* Emit the pshuflw. */
45455 memcpy (perm2, d->perm, 4);
45456 for (i = 4; i < 8; ++i)
45457 perm2[i] = i;
45458 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
45459 gcc_assert (ok);
45461 /* Emit the pshufhw. */
45462 memcpy (perm2 + 4, d->perm + 4, 4);
45463 for (i = 0; i < 4; ++i)
45464 perm2[i] = i;
45465 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
45466 gcc_assert (ok);
45468 return true;
45471 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
45472 the permutation using the SSSE3 palignr instruction. This succeeds
45473 when all of the elements in PERM fit within one vector and we merely
45474 need to shift them down so that a single vector permutation has a
45475 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
45476 the vpalignr instruction itself can perform the requested permutation. */
45478 static bool
45479 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
45481 unsigned i, nelt = d->nelt;
45482 unsigned min, max, minswap, maxswap;
45483 bool in_order, ok, swap = false;
45484 rtx shift, target;
45485 struct expand_vec_perm_d dcopy;
45487 /* Even with AVX, palignr only operates on 128-bit vectors,
45488 in AVX2 palignr operates on both 128-bit lanes. */
45489 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
45490 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
45491 return false;
45493 min = 2 * nelt;
45494 max = 0;
45495 minswap = 2 * nelt;
45496 maxswap = 0;
45497 for (i = 0; i < nelt; ++i)
45499 unsigned e = d->perm[i];
45500 unsigned eswap = d->perm[i] ^ nelt;
45501 if (GET_MODE_SIZE (d->vmode) == 32)
45503 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
45504 eswap = e ^ (nelt / 2);
45506 if (e < min)
45507 min = e;
45508 if (e > max)
45509 max = e;
45510 if (eswap < minswap)
45511 minswap = eswap;
45512 if (eswap > maxswap)
45513 maxswap = eswap;
45515 if (min == 0
45516 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
45518 if (d->one_operand_p
45519 || minswap == 0
45520 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
45521 ? nelt / 2 : nelt))
45522 return false;
45523 swap = true;
45524 min = minswap;
45525 max = maxswap;
45528 /* Given that we have SSSE3, we know we'll be able to implement the
45529 single operand permutation after the palignr with pshufb for
45530 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
45531 first. */
45532 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
45533 return true;
45535 dcopy = *d;
45536 if (swap)
45538 dcopy.op0 = d->op1;
45539 dcopy.op1 = d->op0;
45540 for (i = 0; i < nelt; ++i)
45541 dcopy.perm[i] ^= nelt;
45544 in_order = true;
45545 for (i = 0; i < nelt; ++i)
45547 unsigned e = dcopy.perm[i];
45548 if (GET_MODE_SIZE (d->vmode) == 32
45549 && e >= nelt
45550 && (e & (nelt / 2 - 1)) < min)
45551 e = e - min - (nelt / 2);
45552 else
45553 e = e - min;
45554 if (e != i)
45555 in_order = false;
45556 dcopy.perm[i] = e;
45558 dcopy.one_operand_p = true;
45560 if (single_insn_only_p && !in_order)
45561 return false;
45563 /* For AVX2, test whether we can permute the result in one instruction. */
45564 if (d->testing_p)
45566 if (in_order)
45567 return true;
45568 dcopy.op1 = dcopy.op0;
45569 return expand_vec_perm_1 (&dcopy);
45572 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
45573 if (GET_MODE_SIZE (d->vmode) == 16)
45575 target = gen_reg_rtx (TImode);
45576 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
45577 gen_lowpart (TImode, dcopy.op0), shift));
45579 else
45581 target = gen_reg_rtx (V2TImode);
45582 emit_insn (gen_avx2_palignrv2ti (target,
45583 gen_lowpart (V2TImode, dcopy.op1),
45584 gen_lowpart (V2TImode, dcopy.op0),
45585 shift));
45588 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
45590 /* Test for the degenerate case where the alignment by itself
45591 produces the desired permutation. */
45592 if (in_order)
45594 emit_move_insn (d->target, dcopy.op0);
45595 return true;
45598 ok = expand_vec_perm_1 (&dcopy);
45599 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
45601 return ok;
45604 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
45605 the permutation using the SSE4_1 pblendv instruction. Potentially
45606 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
45608 static bool
45609 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
45611 unsigned i, which, nelt = d->nelt;
45612 struct expand_vec_perm_d dcopy, dcopy1;
45613 machine_mode vmode = d->vmode;
45614 bool ok;
45616 /* Use the same checks as in expand_vec_perm_blend. */
45617 if (d->one_operand_p)
45618 return false;
45619 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
45621 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
45623 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
45625 else
45626 return false;
45628 /* Figure out where permutation elements stay not in their
45629 respective lanes. */
45630 for (i = 0, which = 0; i < nelt; ++i)
45632 unsigned e = d->perm[i];
45633 if (e != i)
45634 which |= (e < nelt ? 1 : 2);
45636 /* We can pblend the part where elements stay not in their
45637 respective lanes only when these elements are all in one
45638 half of a permutation.
45639 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
45640 lanes, but both 8 and 9 >= 8
45641 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
45642 respective lanes and 8 >= 8, but 2 not. */
45643 if (which != 1 && which != 2)
45644 return false;
45645 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
45646 return true;
45648 /* First we apply one operand permutation to the part where
45649 elements stay not in their respective lanes. */
45650 dcopy = *d;
45651 if (which == 2)
45652 dcopy.op0 = dcopy.op1 = d->op1;
45653 else
45654 dcopy.op0 = dcopy.op1 = d->op0;
45655 if (!d->testing_p)
45656 dcopy.target = gen_reg_rtx (vmode);
45657 dcopy.one_operand_p = true;
45659 for (i = 0; i < nelt; ++i)
45660 dcopy.perm[i] = d->perm[i] & (nelt - 1);
45662 ok = expand_vec_perm_1 (&dcopy);
45663 if (GET_MODE_SIZE (vmode) != 16 && !ok)
45664 return false;
45665 else
45666 gcc_assert (ok);
45667 if (d->testing_p)
45668 return true;
45670 /* Next we put permuted elements into their positions. */
45671 dcopy1 = *d;
45672 if (which == 2)
45673 dcopy1.op1 = dcopy.target;
45674 else
45675 dcopy1.op0 = dcopy.target;
45677 for (i = 0; i < nelt; ++i)
45678 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
45680 ok = expand_vec_perm_blend (&dcopy1);
45681 gcc_assert (ok);
45683 return true;
45686 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
45688 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
45689 a two vector permutation into a single vector permutation by using
45690 an interleave operation to merge the vectors. */
45692 static bool
45693 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
45695 struct expand_vec_perm_d dremap, dfinal;
45696 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
45697 unsigned HOST_WIDE_INT contents;
45698 unsigned char remap[2 * MAX_VECT_LEN];
45699 rtx_insn *seq;
45700 bool ok, same_halves = false;
45702 if (GET_MODE_SIZE (d->vmode) == 16)
45704 if (d->one_operand_p)
45705 return false;
45707 else if (GET_MODE_SIZE (d->vmode) == 32)
45709 if (!TARGET_AVX)
45710 return false;
45711 /* For 32-byte modes allow even d->one_operand_p.
45712 The lack of cross-lane shuffling in some instructions
45713 might prevent a single insn shuffle. */
45714 dfinal = *d;
45715 dfinal.testing_p = true;
45716 /* If expand_vec_perm_interleave3 can expand this into
45717 a 3 insn sequence, give up and let it be expanded as
45718 3 insn sequence. While that is one insn longer,
45719 it doesn't need a memory operand and in the common
45720 case that both interleave low and high permutations
45721 with the same operands are adjacent needs 4 insns
45722 for both after CSE. */
45723 if (expand_vec_perm_interleave3 (&dfinal))
45724 return false;
45726 else
45727 return false;
45729 /* Examine from whence the elements come. */
45730 contents = 0;
45731 for (i = 0; i < nelt; ++i)
45732 contents |= HOST_WIDE_INT_1U << d->perm[i];
45734 memset (remap, 0xff, sizeof (remap));
45735 dremap = *d;
45737 if (GET_MODE_SIZE (d->vmode) == 16)
45739 unsigned HOST_WIDE_INT h1, h2, h3, h4;
45741 /* Split the two input vectors into 4 halves. */
45742 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
45743 h2 = h1 << nelt2;
45744 h3 = h2 << nelt2;
45745 h4 = h3 << nelt2;
45747 /* If the elements from the low halves use interleave low, and similarly
45748 for interleave high. If the elements are from mis-matched halves, we
45749 can use shufps for V4SF/V4SI or do a DImode shuffle. */
45750 if ((contents & (h1 | h3)) == contents)
45752 /* punpckl* */
45753 for (i = 0; i < nelt2; ++i)
45755 remap[i] = i * 2;
45756 remap[i + nelt] = i * 2 + 1;
45757 dremap.perm[i * 2] = i;
45758 dremap.perm[i * 2 + 1] = i + nelt;
45760 if (!TARGET_SSE2 && d->vmode == V4SImode)
45761 dremap.vmode = V4SFmode;
45763 else if ((contents & (h2 | h4)) == contents)
45765 /* punpckh* */
45766 for (i = 0; i < nelt2; ++i)
45768 remap[i + nelt2] = i * 2;
45769 remap[i + nelt + nelt2] = i * 2 + 1;
45770 dremap.perm[i * 2] = i + nelt2;
45771 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
45773 if (!TARGET_SSE2 && d->vmode == V4SImode)
45774 dremap.vmode = V4SFmode;
45776 else if ((contents & (h1 | h4)) == contents)
45778 /* shufps */
45779 for (i = 0; i < nelt2; ++i)
45781 remap[i] = i;
45782 remap[i + nelt + nelt2] = i + nelt2;
45783 dremap.perm[i] = i;
45784 dremap.perm[i + nelt2] = i + nelt + nelt2;
45786 if (nelt != 4)
45788 /* shufpd */
45789 dremap.vmode = V2DImode;
45790 dremap.nelt = 2;
45791 dremap.perm[0] = 0;
45792 dremap.perm[1] = 3;
45795 else if ((contents & (h2 | h3)) == contents)
45797 /* shufps */
45798 for (i = 0; i < nelt2; ++i)
45800 remap[i + nelt2] = i;
45801 remap[i + nelt] = i + nelt2;
45802 dremap.perm[i] = i + nelt2;
45803 dremap.perm[i + nelt2] = i + nelt;
45805 if (nelt != 4)
45807 /* shufpd */
45808 dremap.vmode = V2DImode;
45809 dremap.nelt = 2;
45810 dremap.perm[0] = 1;
45811 dremap.perm[1] = 2;
45814 else
45815 return false;
45817 else
45819 unsigned int nelt4 = nelt / 4, nzcnt = 0;
45820 unsigned HOST_WIDE_INT q[8];
45821 unsigned int nonzero_halves[4];
45823 /* Split the two input vectors into 8 quarters. */
45824 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
45825 for (i = 1; i < 8; ++i)
45826 q[i] = q[0] << (nelt4 * i);
45827 for (i = 0; i < 4; ++i)
45828 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
45830 nonzero_halves[nzcnt] = i;
45831 ++nzcnt;
45834 if (nzcnt == 1)
45836 gcc_assert (d->one_operand_p);
45837 nonzero_halves[1] = nonzero_halves[0];
45838 same_halves = true;
45840 else if (d->one_operand_p)
45842 gcc_assert (nonzero_halves[0] == 0);
45843 gcc_assert (nonzero_halves[1] == 1);
45846 if (nzcnt <= 2)
45848 if (d->perm[0] / nelt2 == nonzero_halves[1])
45850 /* Attempt to increase the likelihood that dfinal
45851 shuffle will be intra-lane. */
45852 std::swap (nonzero_halves[0], nonzero_halves[1]);
45855 /* vperm2f128 or vperm2i128. */
45856 for (i = 0; i < nelt2; ++i)
45858 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
45859 remap[i + nonzero_halves[0] * nelt2] = i;
45860 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
45861 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
45864 if (d->vmode != V8SFmode
45865 && d->vmode != V4DFmode
45866 && d->vmode != V8SImode)
45868 dremap.vmode = V8SImode;
45869 dremap.nelt = 8;
45870 for (i = 0; i < 4; ++i)
45872 dremap.perm[i] = i + nonzero_halves[0] * 4;
45873 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
45877 else if (d->one_operand_p)
45878 return false;
45879 else if (TARGET_AVX2
45880 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
45882 /* vpunpckl* */
45883 for (i = 0; i < nelt4; ++i)
45885 remap[i] = i * 2;
45886 remap[i + nelt] = i * 2 + 1;
45887 remap[i + nelt2] = i * 2 + nelt2;
45888 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
45889 dremap.perm[i * 2] = i;
45890 dremap.perm[i * 2 + 1] = i + nelt;
45891 dremap.perm[i * 2 + nelt2] = i + nelt2;
45892 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
45895 else if (TARGET_AVX2
45896 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
45898 /* vpunpckh* */
45899 for (i = 0; i < nelt4; ++i)
45901 remap[i + nelt4] = i * 2;
45902 remap[i + nelt + nelt4] = i * 2 + 1;
45903 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
45904 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
45905 dremap.perm[i * 2] = i + nelt4;
45906 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
45907 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
45908 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
45911 else
45912 return false;
45915 /* Use the remapping array set up above to move the elements from their
45916 swizzled locations into their final destinations. */
45917 dfinal = *d;
45918 for (i = 0; i < nelt; ++i)
45920 unsigned e = remap[d->perm[i]];
45921 gcc_assert (e < nelt);
45922 /* If same_halves is true, both halves of the remapped vector are the
45923 same. Avoid cross-lane accesses if possible. */
45924 if (same_halves && i >= nelt2)
45926 gcc_assert (e < nelt2);
45927 dfinal.perm[i] = e + nelt2;
45929 else
45930 dfinal.perm[i] = e;
45932 if (!d->testing_p)
45934 dremap.target = gen_reg_rtx (dremap.vmode);
45935 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
45937 dfinal.op1 = dfinal.op0;
45938 dfinal.one_operand_p = true;
45940 /* Test if the final remap can be done with a single insn. For V4SFmode or
45941 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
45942 start_sequence ();
45943 ok = expand_vec_perm_1 (&dfinal);
45944 seq = get_insns ();
45945 end_sequence ();
45947 if (!ok)
45948 return false;
45950 if (d->testing_p)
45951 return true;
45953 if (dremap.vmode != dfinal.vmode)
45955 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
45956 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
45959 ok = expand_vec_perm_1 (&dremap);
45960 gcc_assert (ok);
45962 emit_insn (seq);
45963 return true;
45966 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
45967 a single vector cross-lane permutation into vpermq followed
45968 by any of the single insn permutations. */
45970 static bool
45971 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
45973 struct expand_vec_perm_d dremap, dfinal;
45974 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
45975 unsigned contents[2];
45976 bool ok;
45978 if (!(TARGET_AVX2
45979 && (d->vmode == V32QImode || d->vmode == V16HImode)
45980 && d->one_operand_p))
45981 return false;
45983 contents[0] = 0;
45984 contents[1] = 0;
45985 for (i = 0; i < nelt2; ++i)
45987 contents[0] |= 1u << (d->perm[i] / nelt4);
45988 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
45991 for (i = 0; i < 2; ++i)
45993 unsigned int cnt = 0;
45994 for (j = 0; j < 4; ++j)
45995 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
45996 return false;
45999 if (d->testing_p)
46000 return true;
46002 dremap = *d;
46003 dremap.vmode = V4DImode;
46004 dremap.nelt = 4;
46005 dremap.target = gen_reg_rtx (V4DImode);
46006 dremap.op0 = gen_lowpart (V4DImode, d->op0);
46007 dremap.op1 = dremap.op0;
46008 dremap.one_operand_p = true;
46009 for (i = 0; i < 2; ++i)
46011 unsigned int cnt = 0;
46012 for (j = 0; j < 4; ++j)
46013 if ((contents[i] & (1u << j)) != 0)
46014 dremap.perm[2 * i + cnt++] = j;
46015 for (; cnt < 2; ++cnt)
46016 dremap.perm[2 * i + cnt] = 0;
46019 dfinal = *d;
46020 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
46021 dfinal.op1 = dfinal.op0;
46022 dfinal.one_operand_p = true;
46023 for (i = 0, j = 0; i < nelt; ++i)
46025 if (i == nelt2)
46026 j = 2;
46027 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
46028 if ((d->perm[i] / nelt4) == dremap.perm[j])
46030 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
46031 dfinal.perm[i] |= nelt4;
46032 else
46033 gcc_unreachable ();
46036 ok = expand_vec_perm_1 (&dremap);
46037 gcc_assert (ok);
46039 ok = expand_vec_perm_1 (&dfinal);
46040 gcc_assert (ok);
46042 return true;
46045 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
46046 a vector permutation using two instructions, vperm2f128 resp.
46047 vperm2i128 followed by any single in-lane permutation. */
46049 static bool
46050 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
46052 struct expand_vec_perm_d dfirst, dsecond;
46053 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
46054 bool ok;
46056 if (!TARGET_AVX
46057 || GET_MODE_SIZE (d->vmode) != 32
46058 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
46059 return false;
46061 dsecond = *d;
46062 dsecond.one_operand_p = false;
46063 dsecond.testing_p = true;
46065 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
46066 immediate. For perm < 16 the second permutation uses
46067 d->op0 as first operand, for perm >= 16 it uses d->op1
46068 as first operand. The second operand is the result of
46069 vperm2[fi]128. */
46070 for (perm = 0; perm < 32; perm++)
46072 /* Ignore permutations which do not move anything cross-lane. */
46073 if (perm < 16)
46075 /* The second shuffle for e.g. V4DFmode has
46076 0123 and ABCD operands.
46077 Ignore AB23, as 23 is already in the second lane
46078 of the first operand. */
46079 if ((perm & 0xc) == (1 << 2)) continue;
46080 /* And 01CD, as 01 is in the first lane of the first
46081 operand. */
46082 if ((perm & 3) == 0) continue;
46083 /* And 4567, as then the vperm2[fi]128 doesn't change
46084 anything on the original 4567 second operand. */
46085 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
46087 else
46089 /* The second shuffle for e.g. V4DFmode has
46090 4567 and ABCD operands.
46091 Ignore AB67, as 67 is already in the second lane
46092 of the first operand. */
46093 if ((perm & 0xc) == (3 << 2)) continue;
46094 /* And 45CD, as 45 is in the first lane of the first
46095 operand. */
46096 if ((perm & 3) == 2) continue;
46097 /* And 0123, as then the vperm2[fi]128 doesn't change
46098 anything on the original 0123 first operand. */
46099 if ((perm & 0xf) == (1 << 2)) continue;
46102 for (i = 0; i < nelt; i++)
46104 j = d->perm[i] / nelt2;
46105 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
46106 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
46107 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
46108 dsecond.perm[i] = d->perm[i] & (nelt - 1);
46109 else
46110 break;
46113 if (i == nelt)
46115 start_sequence ();
46116 ok = expand_vec_perm_1 (&dsecond);
46117 end_sequence ();
46119 else
46120 ok = false;
46122 if (ok)
46124 if (d->testing_p)
46125 return true;
46127 /* Found a usable second shuffle. dfirst will be
46128 vperm2f128 on d->op0 and d->op1. */
46129 dsecond.testing_p = false;
46130 dfirst = *d;
46131 dfirst.target = gen_reg_rtx (d->vmode);
46132 for (i = 0; i < nelt; i++)
46133 dfirst.perm[i] = (i & (nelt2 - 1))
46134 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
46136 canonicalize_perm (&dfirst);
46137 ok = expand_vec_perm_1 (&dfirst);
46138 gcc_assert (ok);
46140 /* And dsecond is some single insn shuffle, taking
46141 d->op0 and result of vperm2f128 (if perm < 16) or
46142 d->op1 and result of vperm2f128 (otherwise). */
46143 if (perm >= 16)
46144 dsecond.op0 = dsecond.op1;
46145 dsecond.op1 = dfirst.target;
46147 ok = expand_vec_perm_1 (&dsecond);
46148 gcc_assert (ok);
46150 return true;
46153 /* For one operand, the only useful vperm2f128 permutation is 0x01
46154 aka lanes swap. */
46155 if (d->one_operand_p)
46156 return false;
46159 return false;
46162 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46163 a two vector permutation using 2 intra-lane interleave insns
46164 and cross-lane shuffle for 32-byte vectors. */
46166 static bool
46167 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
46169 unsigned i, nelt;
46170 rtx (*gen) (rtx, rtx, rtx);
46172 if (d->one_operand_p)
46173 return false;
46174 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
46176 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
46178 else
46179 return false;
46181 nelt = d->nelt;
46182 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
46183 return false;
46184 for (i = 0; i < nelt; i += 2)
46185 if (d->perm[i] != d->perm[0] + i / 2
46186 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
46187 return false;
46189 if (d->testing_p)
46190 return true;
46192 switch (d->vmode)
46194 case E_V32QImode:
46195 if (d->perm[0])
46196 gen = gen_vec_interleave_highv32qi;
46197 else
46198 gen = gen_vec_interleave_lowv32qi;
46199 break;
46200 case E_V16HImode:
46201 if (d->perm[0])
46202 gen = gen_vec_interleave_highv16hi;
46203 else
46204 gen = gen_vec_interleave_lowv16hi;
46205 break;
46206 case E_V8SImode:
46207 if (d->perm[0])
46208 gen = gen_vec_interleave_highv8si;
46209 else
46210 gen = gen_vec_interleave_lowv8si;
46211 break;
46212 case E_V4DImode:
46213 if (d->perm[0])
46214 gen = gen_vec_interleave_highv4di;
46215 else
46216 gen = gen_vec_interleave_lowv4di;
46217 break;
46218 case E_V8SFmode:
46219 if (d->perm[0])
46220 gen = gen_vec_interleave_highv8sf;
46221 else
46222 gen = gen_vec_interleave_lowv8sf;
46223 break;
46224 case E_V4DFmode:
46225 if (d->perm[0])
46226 gen = gen_vec_interleave_highv4df;
46227 else
46228 gen = gen_vec_interleave_lowv4df;
46229 break;
46230 default:
46231 gcc_unreachable ();
46234 emit_insn (gen (d->target, d->op0, d->op1));
46235 return true;
46238 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
46239 a single vector permutation using a single intra-lane vector
46240 permutation, vperm2f128 swapping the lanes and vblend* insn blending
46241 the non-swapped and swapped vectors together. */
46243 static bool
46244 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
46246 struct expand_vec_perm_d dfirst, dsecond;
46247 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
46248 rtx_insn *seq;
46249 bool ok;
46250 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
46252 if (!TARGET_AVX
46253 || TARGET_AVX2
46254 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
46255 || !d->one_operand_p)
46256 return false;
46258 dfirst = *d;
46259 for (i = 0; i < nelt; i++)
46260 dfirst.perm[i] = 0xff;
46261 for (i = 0, msk = 0; i < nelt; i++)
46263 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
46264 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
46265 return false;
46266 dfirst.perm[j] = d->perm[i];
46267 if (j != i)
46268 msk |= (1 << i);
46270 for (i = 0; i < nelt; i++)
46271 if (dfirst.perm[i] == 0xff)
46272 dfirst.perm[i] = i;
46274 if (!d->testing_p)
46275 dfirst.target = gen_reg_rtx (dfirst.vmode);
46277 start_sequence ();
46278 ok = expand_vec_perm_1 (&dfirst);
46279 seq = get_insns ();
46280 end_sequence ();
46282 if (!ok)
46283 return false;
46285 if (d->testing_p)
46286 return true;
46288 emit_insn (seq);
46290 dsecond = *d;
46291 dsecond.op0 = dfirst.target;
46292 dsecond.op1 = dfirst.target;
46293 dsecond.one_operand_p = true;
46294 dsecond.target = gen_reg_rtx (dsecond.vmode);
46295 for (i = 0; i < nelt; i++)
46296 dsecond.perm[i] = i ^ nelt2;
46298 ok = expand_vec_perm_1 (&dsecond);
46299 gcc_assert (ok);
46301 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
46302 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
46303 return true;
46306 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
46307 permutation using two vperm2f128, followed by a vshufpd insn blending
46308 the two vectors together. */
46310 static bool
46311 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
46313 struct expand_vec_perm_d dfirst, dsecond, dthird;
46314 bool ok;
46316 if (!TARGET_AVX || (d->vmode != V4DFmode))
46317 return false;
46319 if (d->testing_p)
46320 return true;
46322 dfirst = *d;
46323 dsecond = *d;
46324 dthird = *d;
46326 dfirst.perm[0] = (d->perm[0] & ~1);
46327 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
46328 dfirst.perm[2] = (d->perm[2] & ~1);
46329 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
46330 dsecond.perm[0] = (d->perm[1] & ~1);
46331 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
46332 dsecond.perm[2] = (d->perm[3] & ~1);
46333 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
46334 dthird.perm[0] = (d->perm[0] % 2);
46335 dthird.perm[1] = (d->perm[1] % 2) + 4;
46336 dthird.perm[2] = (d->perm[2] % 2) + 2;
46337 dthird.perm[3] = (d->perm[3] % 2) + 6;
46339 dfirst.target = gen_reg_rtx (dfirst.vmode);
46340 dsecond.target = gen_reg_rtx (dsecond.vmode);
46341 dthird.op0 = dfirst.target;
46342 dthird.op1 = dsecond.target;
46343 dthird.one_operand_p = false;
46345 canonicalize_perm (&dfirst);
46346 canonicalize_perm (&dsecond);
46348 ok = expand_vec_perm_1 (&dfirst)
46349 && expand_vec_perm_1 (&dsecond)
46350 && expand_vec_perm_1 (&dthird);
46352 gcc_assert (ok);
46354 return true;
46357 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
46358 permutation with two pshufb insns and an ior. We should have already
46359 failed all two instruction sequences. */
46361 static bool
46362 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
46364 rtx rperm[2][16], vperm, l, h, op, m128;
46365 unsigned int i, nelt, eltsz;
46367 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
46368 return false;
46369 gcc_assert (!d->one_operand_p);
46371 if (d->testing_p)
46372 return true;
46374 nelt = d->nelt;
46375 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46377 /* Generate two permutation masks. If the required element is within
46378 the given vector it is shuffled into the proper lane. If the required
46379 element is in the other vector, force a zero into the lane by setting
46380 bit 7 in the permutation mask. */
46381 m128 = GEN_INT (-128);
46382 for (i = 0; i < nelt; ++i)
46384 unsigned j, e = d->perm[i];
46385 unsigned which = (e >= nelt);
46386 if (e >= nelt)
46387 e -= nelt;
46389 for (j = 0; j < eltsz; ++j)
46391 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
46392 rperm[1-which][i*eltsz + j] = m128;
46396 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
46397 vperm = force_reg (V16QImode, vperm);
46399 l = gen_reg_rtx (V16QImode);
46400 op = gen_lowpart (V16QImode, d->op0);
46401 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
46403 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
46404 vperm = force_reg (V16QImode, vperm);
46406 h = gen_reg_rtx (V16QImode);
46407 op = gen_lowpart (V16QImode, d->op1);
46408 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
46410 op = d->target;
46411 if (d->vmode != V16QImode)
46412 op = gen_reg_rtx (V16QImode);
46413 emit_insn (gen_iorv16qi3 (op, l, h));
46414 if (op != d->target)
46415 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
46417 return true;
46420 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
46421 with two vpshufb insns, vpermq and vpor. We should have already failed
46422 all two or three instruction sequences. */
46424 static bool
46425 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
46427 rtx rperm[2][32], vperm, l, h, hp, op, m128;
46428 unsigned int i, nelt, eltsz;
46430 if (!TARGET_AVX2
46431 || !d->one_operand_p
46432 || (d->vmode != V32QImode && d->vmode != V16HImode))
46433 return false;
46435 if (d->testing_p)
46436 return true;
46438 nelt = d->nelt;
46439 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46441 /* Generate two permutation masks. If the required element is within
46442 the same lane, it is shuffled in. If the required element from the
46443 other lane, force a zero by setting bit 7 in the permutation mask.
46444 In the other mask the mask has non-negative elements if element
46445 is requested from the other lane, but also moved to the other lane,
46446 so that the result of vpshufb can have the two V2TImode halves
46447 swapped. */
46448 m128 = GEN_INT (-128);
46449 for (i = 0; i < nelt; ++i)
46451 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
46452 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
46454 for (j = 0; j < eltsz; ++j)
46456 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
46457 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
46461 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
46462 vperm = force_reg (V32QImode, vperm);
46464 h = gen_reg_rtx (V32QImode);
46465 op = gen_lowpart (V32QImode, d->op0);
46466 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
46468 /* Swap the 128-byte lanes of h into hp. */
46469 hp = gen_reg_rtx (V4DImode);
46470 op = gen_lowpart (V4DImode, h);
46471 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
46472 const1_rtx));
46474 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
46475 vperm = force_reg (V32QImode, vperm);
46477 l = gen_reg_rtx (V32QImode);
46478 op = gen_lowpart (V32QImode, d->op0);
46479 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
46481 op = d->target;
46482 if (d->vmode != V32QImode)
46483 op = gen_reg_rtx (V32QImode);
46484 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
46485 if (op != d->target)
46486 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
46488 return true;
46491 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
46492 and extract-odd permutations of two V32QImode and V16QImode operand
46493 with two vpshufb insns, vpor and vpermq. We should have already
46494 failed all two or three instruction sequences. */
46496 static bool
46497 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
46499 rtx rperm[2][32], vperm, l, h, ior, op, m128;
46500 unsigned int i, nelt, eltsz;
46502 if (!TARGET_AVX2
46503 || d->one_operand_p
46504 || (d->vmode != V32QImode && d->vmode != V16HImode))
46505 return false;
46507 for (i = 0; i < d->nelt; ++i)
46508 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
46509 return false;
46511 if (d->testing_p)
46512 return true;
46514 nelt = d->nelt;
46515 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46517 /* Generate two permutation masks. In the first permutation mask
46518 the first quarter will contain indexes for the first half
46519 of the op0, the second quarter will contain bit 7 set, third quarter
46520 will contain indexes for the second half of the op0 and the
46521 last quarter bit 7 set. In the second permutation mask
46522 the first quarter will contain bit 7 set, the second quarter
46523 indexes for the first half of the op1, the third quarter bit 7 set
46524 and last quarter indexes for the second half of the op1.
46525 I.e. the first mask e.g. for V32QImode extract even will be:
46526 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
46527 (all values masked with 0xf except for -128) and second mask
46528 for extract even will be
46529 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
46530 m128 = GEN_INT (-128);
46531 for (i = 0; i < nelt; ++i)
46533 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
46534 unsigned which = d->perm[i] >= nelt;
46535 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
46537 for (j = 0; j < eltsz; ++j)
46539 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
46540 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
46544 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
46545 vperm = force_reg (V32QImode, vperm);
46547 l = gen_reg_rtx (V32QImode);
46548 op = gen_lowpart (V32QImode, d->op0);
46549 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
46551 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
46552 vperm = force_reg (V32QImode, vperm);
46554 h = gen_reg_rtx (V32QImode);
46555 op = gen_lowpart (V32QImode, d->op1);
46556 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
46558 ior = gen_reg_rtx (V32QImode);
46559 emit_insn (gen_iorv32qi3 (ior, l, h));
46561 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
46562 op = gen_reg_rtx (V4DImode);
46563 ior = gen_lowpart (V4DImode, ior);
46564 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
46565 const1_rtx, GEN_INT (3)));
46566 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
46568 return true;
46571 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
46572 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
46573 with two "and" and "pack" or two "shift" and "pack" insns. We should
46574 have already failed all two instruction sequences. */
46576 static bool
46577 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
46579 rtx op, dop0, dop1, t;
46580 unsigned i, odd, c, s, nelt = d->nelt;
46581 bool end_perm = false;
46582 machine_mode half_mode;
46583 rtx (*gen_and) (rtx, rtx, rtx);
46584 rtx (*gen_pack) (rtx, rtx, rtx);
46585 rtx (*gen_shift) (rtx, rtx, rtx);
46587 if (d->one_operand_p)
46588 return false;
46590 switch (d->vmode)
46592 case E_V8HImode:
46593 /* Required for "pack". */
46594 if (!TARGET_SSE4_1)
46595 return false;
46596 c = 0xffff;
46597 s = 16;
46598 half_mode = V4SImode;
46599 gen_and = gen_andv4si3;
46600 gen_pack = gen_sse4_1_packusdw;
46601 gen_shift = gen_lshrv4si3;
46602 break;
46603 case E_V16QImode:
46604 /* No check as all instructions are SSE2. */
46605 c = 0xff;
46606 s = 8;
46607 half_mode = V8HImode;
46608 gen_and = gen_andv8hi3;
46609 gen_pack = gen_sse2_packuswb;
46610 gen_shift = gen_lshrv8hi3;
46611 break;
46612 case E_V16HImode:
46613 if (!TARGET_AVX2)
46614 return false;
46615 c = 0xffff;
46616 s = 16;
46617 half_mode = V8SImode;
46618 gen_and = gen_andv8si3;
46619 gen_pack = gen_avx2_packusdw;
46620 gen_shift = gen_lshrv8si3;
46621 end_perm = true;
46622 break;
46623 case E_V32QImode:
46624 if (!TARGET_AVX2)
46625 return false;
46626 c = 0xff;
46627 s = 8;
46628 half_mode = V16HImode;
46629 gen_and = gen_andv16hi3;
46630 gen_pack = gen_avx2_packuswb;
46631 gen_shift = gen_lshrv16hi3;
46632 end_perm = true;
46633 break;
46634 default:
46635 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
46636 general shuffles. */
46637 return false;
46640 /* Check that permutation is even or odd. */
46641 odd = d->perm[0];
46642 if (odd > 1)
46643 return false;
46645 for (i = 1; i < nelt; ++i)
46646 if (d->perm[i] != 2 * i + odd)
46647 return false;
46649 if (d->testing_p)
46650 return true;
46652 dop0 = gen_reg_rtx (half_mode);
46653 dop1 = gen_reg_rtx (half_mode);
46654 if (odd == 0)
46656 t = gen_const_vec_duplicate (half_mode, GEN_INT (c));
46657 t = force_reg (half_mode, t);
46658 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
46659 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
46661 else
46663 emit_insn (gen_shift (dop0,
46664 gen_lowpart (half_mode, d->op0),
46665 GEN_INT (s)));
46666 emit_insn (gen_shift (dop1,
46667 gen_lowpart (half_mode, d->op1),
46668 GEN_INT (s)));
46670 /* In AVX2 for 256 bit case we need to permute pack result. */
46671 if (TARGET_AVX2 && end_perm)
46673 op = gen_reg_rtx (d->vmode);
46674 t = gen_reg_rtx (V4DImode);
46675 emit_insn (gen_pack (op, dop0, dop1));
46676 emit_insn (gen_avx2_permv4di_1 (t,
46677 gen_lowpart (V4DImode, op),
46678 const0_rtx,
46679 const2_rtx,
46680 const1_rtx,
46681 GEN_INT (3)));
46682 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
46684 else
46685 emit_insn (gen_pack (d->target, dop0, dop1));
46687 return true;
46690 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
46691 and extract-odd permutations of two V64QI operands
46692 with two "shifts", two "truncs" and one "concat" insns for "odd"
46693 and two "truncs" and one concat insn for "even."
46694 Have already failed all two instruction sequences. */
46696 static bool
46697 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
46699 rtx t1, t2, t3, t4;
46700 unsigned i, odd, nelt = d->nelt;
46702 if (!TARGET_AVX512BW
46703 || d->one_operand_p
46704 || d->vmode != V64QImode)
46705 return false;
46707 /* Check that permutation is even or odd. */
46708 odd = d->perm[0];
46709 if (odd > 1)
46710 return false;
46712 for (i = 1; i < nelt; ++i)
46713 if (d->perm[i] != 2 * i + odd)
46714 return false;
46716 if (d->testing_p)
46717 return true;
46720 if (odd)
46722 t1 = gen_reg_rtx (V32HImode);
46723 t2 = gen_reg_rtx (V32HImode);
46724 emit_insn (gen_lshrv32hi3 (t1,
46725 gen_lowpart (V32HImode, d->op0),
46726 GEN_INT (8)));
46727 emit_insn (gen_lshrv32hi3 (t2,
46728 gen_lowpart (V32HImode, d->op1),
46729 GEN_INT (8)));
46731 else
46733 t1 = gen_lowpart (V32HImode, d->op0);
46734 t2 = gen_lowpart (V32HImode, d->op1);
46737 t3 = gen_reg_rtx (V32QImode);
46738 t4 = gen_reg_rtx (V32QImode);
46739 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
46740 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
46741 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
46743 return true;
46746 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
46747 and extract-odd permutations. */
46749 static bool
46750 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
46752 rtx t1, t2, t3, t4, t5;
46754 switch (d->vmode)
46756 case E_V4DFmode:
46757 if (d->testing_p)
46758 break;
46759 t1 = gen_reg_rtx (V4DFmode);
46760 t2 = gen_reg_rtx (V4DFmode);
46762 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
46763 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
46764 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
46766 /* Now an unpck[lh]pd will produce the result required. */
46767 if (odd)
46768 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
46769 else
46770 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
46771 emit_insn (t3);
46772 break;
46774 case E_V8SFmode:
46776 int mask = odd ? 0xdd : 0x88;
46778 if (d->testing_p)
46779 break;
46780 t1 = gen_reg_rtx (V8SFmode);
46781 t2 = gen_reg_rtx (V8SFmode);
46782 t3 = gen_reg_rtx (V8SFmode);
46784 /* Shuffle within the 128-bit lanes to produce:
46785 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
46786 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
46787 GEN_INT (mask)));
46789 /* Shuffle the lanes around to produce:
46790 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
46791 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
46792 GEN_INT (0x3)));
46794 /* Shuffle within the 128-bit lanes to produce:
46795 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
46796 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
46798 /* Shuffle within the 128-bit lanes to produce:
46799 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
46800 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
46802 /* Shuffle the lanes around to produce:
46803 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
46804 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
46805 GEN_INT (0x20)));
46807 break;
46809 case E_V2DFmode:
46810 case E_V4SFmode:
46811 case E_V2DImode:
46812 case E_V4SImode:
46813 /* These are always directly implementable by expand_vec_perm_1. */
46814 gcc_unreachable ();
46816 case E_V8HImode:
46817 if (TARGET_SSE4_1)
46818 return expand_vec_perm_even_odd_pack (d);
46819 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
46820 return expand_vec_perm_pshufb2 (d);
46821 else
46823 if (d->testing_p)
46824 break;
46825 /* We need 2*log2(N)-1 operations to achieve odd/even
46826 with interleave. */
46827 t1 = gen_reg_rtx (V8HImode);
46828 t2 = gen_reg_rtx (V8HImode);
46829 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
46830 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
46831 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
46832 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
46833 if (odd)
46834 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
46835 else
46836 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
46837 emit_insn (t3);
46839 break;
46841 case E_V16QImode:
46842 return expand_vec_perm_even_odd_pack (d);
46844 case E_V16HImode:
46845 case E_V32QImode:
46846 return expand_vec_perm_even_odd_pack (d);
46848 case E_V64QImode:
46849 return expand_vec_perm_even_odd_trunc (d);
46851 case E_V4DImode:
46852 if (!TARGET_AVX2)
46854 struct expand_vec_perm_d d_copy = *d;
46855 d_copy.vmode = V4DFmode;
46856 if (d->testing_p)
46857 d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
46858 else
46859 d_copy.target = gen_reg_rtx (V4DFmode);
46860 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
46861 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
46862 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
46864 if (!d->testing_p)
46865 emit_move_insn (d->target,
46866 gen_lowpart (V4DImode, d_copy.target));
46867 return true;
46869 return false;
46872 if (d->testing_p)
46873 break;
46875 t1 = gen_reg_rtx (V4DImode);
46876 t2 = gen_reg_rtx (V4DImode);
46878 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
46879 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
46880 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
46882 /* Now an vpunpck[lh]qdq will produce the result required. */
46883 if (odd)
46884 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
46885 else
46886 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
46887 emit_insn (t3);
46888 break;
46890 case E_V8SImode:
46891 if (!TARGET_AVX2)
46893 struct expand_vec_perm_d d_copy = *d;
46894 d_copy.vmode = V8SFmode;
46895 if (d->testing_p)
46896 d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
46897 else
46898 d_copy.target = gen_reg_rtx (V8SFmode);
46899 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
46900 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
46901 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
46903 if (!d->testing_p)
46904 emit_move_insn (d->target,
46905 gen_lowpart (V8SImode, d_copy.target));
46906 return true;
46908 return false;
46911 if (d->testing_p)
46912 break;
46914 t1 = gen_reg_rtx (V8SImode);
46915 t2 = gen_reg_rtx (V8SImode);
46916 t3 = gen_reg_rtx (V4DImode);
46917 t4 = gen_reg_rtx (V4DImode);
46918 t5 = gen_reg_rtx (V4DImode);
46920 /* Shuffle the lanes around into
46921 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
46922 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
46923 gen_lowpart (V4DImode, d->op1),
46924 GEN_INT (0x20)));
46925 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
46926 gen_lowpart (V4DImode, d->op1),
46927 GEN_INT (0x31)));
46929 /* Swap the 2nd and 3rd position in each lane into
46930 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
46931 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
46932 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
46933 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
46934 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
46936 /* Now an vpunpck[lh]qdq will produce
46937 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
46938 if (odd)
46939 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
46940 gen_lowpart (V4DImode, t2));
46941 else
46942 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
46943 gen_lowpart (V4DImode, t2));
46944 emit_insn (t3);
46945 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
46946 break;
46948 default:
46949 gcc_unreachable ();
46952 return true;
46955 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
46956 extract-even and extract-odd permutations. */
46958 static bool
46959 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
46961 unsigned i, odd, nelt = d->nelt;
46963 odd = d->perm[0];
46964 if (odd != 0 && odd != 1)
46965 return false;
46967 for (i = 1; i < nelt; ++i)
46968 if (d->perm[i] != 2 * i + odd)
46969 return false;
46971 return expand_vec_perm_even_odd_1 (d, odd);
46974 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
46975 permutations. We assume that expand_vec_perm_1 has already failed. */
46977 static bool
46978 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
46980 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
46981 machine_mode vmode = d->vmode;
46982 unsigned char perm2[4];
46983 rtx op0 = d->op0, dest;
46984 bool ok;
46986 switch (vmode)
46988 case E_V4DFmode:
46989 case E_V8SFmode:
46990 /* These are special-cased in sse.md so that we can optionally
46991 use the vbroadcast instruction. They expand to two insns
46992 if the input happens to be in a register. */
46993 gcc_unreachable ();
46995 case E_V2DFmode:
46996 case E_V2DImode:
46997 case E_V4SFmode:
46998 case E_V4SImode:
46999 /* These are always implementable using standard shuffle patterns. */
47000 gcc_unreachable ();
47002 case E_V8HImode:
47003 case E_V16QImode:
47004 /* These can be implemented via interleave. We save one insn by
47005 stopping once we have promoted to V4SImode and then use pshufd. */
47006 if (d->testing_p)
47007 return true;
47010 rtx dest;
47011 rtx (*gen) (rtx, rtx, rtx)
47012 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
47013 : gen_vec_interleave_lowv8hi;
47015 if (elt >= nelt2)
47017 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
47018 : gen_vec_interleave_highv8hi;
47019 elt -= nelt2;
47021 nelt2 /= 2;
47023 dest = gen_reg_rtx (vmode);
47024 emit_insn (gen (dest, op0, op0));
47025 vmode = get_mode_wider_vector (vmode);
47026 op0 = gen_lowpart (vmode, dest);
47028 while (vmode != V4SImode);
47030 memset (perm2, elt, 4);
47031 dest = gen_reg_rtx (V4SImode);
47032 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
47033 gcc_assert (ok);
47034 if (!d->testing_p)
47035 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
47036 return true;
47038 case E_V64QImode:
47039 case E_V32QImode:
47040 case E_V16HImode:
47041 case E_V8SImode:
47042 case E_V4DImode:
47043 /* For AVX2 broadcasts of the first element vpbroadcast* or
47044 vpermq should be used by expand_vec_perm_1. */
47045 gcc_assert (!TARGET_AVX2 || d->perm[0]);
47046 return false;
47048 default:
47049 gcc_unreachable ();
47053 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
47054 broadcast permutations. */
47056 static bool
47057 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
47059 unsigned i, elt, nelt = d->nelt;
47061 if (!d->one_operand_p)
47062 return false;
47064 elt = d->perm[0];
47065 for (i = 1; i < nelt; ++i)
47066 if (d->perm[i] != elt)
47067 return false;
47069 return expand_vec_perm_broadcast_1 (d);
47072 /* Implement arbitrary permutations of two V64QImode operands
47073 with 2 vperm[it]2w, 2 vpshufb and one vpor instruction. */
47074 static bool
47075 expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d)
47077 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
47078 return false;
47080 if (d->testing_p)
47081 return true;
47083 struct expand_vec_perm_d ds[2];
47084 rtx rperm[128], vperm, target0, target1;
47085 unsigned int i, nelt;
47086 machine_mode vmode;
47088 nelt = d->nelt;
47089 vmode = V64QImode;
47091 for (i = 0; i < 2; i++)
47093 ds[i] = *d;
47094 ds[i].vmode = V32HImode;
47095 ds[i].nelt = 32;
47096 ds[i].target = gen_reg_rtx (V32HImode);
47097 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
47098 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
47101 /* Prepare permutations such that the first one takes care of
47102 putting the even bytes into the right positions or one higher
47103 positions (ds[0]) and the second one takes care of
47104 putting the odd bytes into the right positions or one below
47105 (ds[1]). */
47107 for (i = 0; i < nelt; i++)
47109 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
47110 if (i & 1)
47112 rperm[i] = constm1_rtx;
47113 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
47115 else
47117 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
47118 rperm[i + 64] = constm1_rtx;
47122 bool ok = expand_vec_perm_1 (&ds[0]);
47123 gcc_assert (ok);
47124 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
47126 ok = expand_vec_perm_1 (&ds[1]);
47127 gcc_assert (ok);
47128 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
47130 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
47131 vperm = force_reg (vmode, vperm);
47132 target0 = gen_reg_rtx (V64QImode);
47133 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
47135 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
47136 vperm = force_reg (vmode, vperm);
47137 target1 = gen_reg_rtx (V64QImode);
47138 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
47140 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
47141 return true;
47144 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
47145 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
47146 all the shorter instruction sequences. */
47148 static bool
47149 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
47151 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
47152 unsigned int i, nelt, eltsz;
47153 bool used[4];
47155 if (!TARGET_AVX2
47156 || d->one_operand_p
47157 || (d->vmode != V32QImode && d->vmode != V16HImode))
47158 return false;
47160 if (d->testing_p)
47161 return true;
47163 nelt = d->nelt;
47164 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
47166 /* Generate 4 permutation masks. If the required element is within
47167 the same lane, it is shuffled in. If the required element from the
47168 other lane, force a zero by setting bit 7 in the permutation mask.
47169 In the other mask the mask has non-negative elements if element
47170 is requested from the other lane, but also moved to the other lane,
47171 so that the result of vpshufb can have the two V2TImode halves
47172 swapped. */
47173 m128 = GEN_INT (-128);
47174 for (i = 0; i < 32; ++i)
47176 rperm[0][i] = m128;
47177 rperm[1][i] = m128;
47178 rperm[2][i] = m128;
47179 rperm[3][i] = m128;
47181 used[0] = false;
47182 used[1] = false;
47183 used[2] = false;
47184 used[3] = false;
47185 for (i = 0; i < nelt; ++i)
47187 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
47188 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
47189 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
47191 for (j = 0; j < eltsz; ++j)
47192 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
47193 used[which] = true;
47196 for (i = 0; i < 2; ++i)
47198 if (!used[2 * i + 1])
47200 h[i] = NULL_RTX;
47201 continue;
47203 vperm = gen_rtx_CONST_VECTOR (V32QImode,
47204 gen_rtvec_v (32, rperm[2 * i + 1]));
47205 vperm = force_reg (V32QImode, vperm);
47206 h[i] = gen_reg_rtx (V32QImode);
47207 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
47208 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
47211 /* Swap the 128-byte lanes of h[X]. */
47212 for (i = 0; i < 2; ++i)
47214 if (h[i] == NULL_RTX)
47215 continue;
47216 op = gen_reg_rtx (V4DImode);
47217 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
47218 const2_rtx, GEN_INT (3), const0_rtx,
47219 const1_rtx));
47220 h[i] = gen_lowpart (V32QImode, op);
47223 for (i = 0; i < 2; ++i)
47225 if (!used[2 * i])
47227 l[i] = NULL_RTX;
47228 continue;
47230 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
47231 vperm = force_reg (V32QImode, vperm);
47232 l[i] = gen_reg_rtx (V32QImode);
47233 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
47234 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
47237 for (i = 0; i < 2; ++i)
47239 if (h[i] && l[i])
47241 op = gen_reg_rtx (V32QImode);
47242 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
47243 l[i] = op;
47245 else if (h[i])
47246 l[i] = h[i];
47249 gcc_assert (l[0] && l[1]);
47250 op = d->target;
47251 if (d->vmode != V32QImode)
47252 op = gen_reg_rtx (V32QImode);
47253 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
47254 if (op != d->target)
47255 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
47256 return true;
47259 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
47260 With all of the interface bits taken care of, perform the expansion
47261 in D and return true on success. */
47263 static bool
47264 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
47266 /* Try a single instruction expansion. */
47267 if (expand_vec_perm_1 (d))
47268 return true;
47270 /* Try sequences of two instructions. */
47272 if (expand_vec_perm_pshuflw_pshufhw (d))
47273 return true;
47275 if (expand_vec_perm_palignr (d, false))
47276 return true;
47278 if (expand_vec_perm_interleave2 (d))
47279 return true;
47281 if (expand_vec_perm_broadcast (d))
47282 return true;
47284 if (expand_vec_perm_vpermq_perm_1 (d))
47285 return true;
47287 if (expand_vec_perm_vperm2f128 (d))
47288 return true;
47290 if (expand_vec_perm_pblendv (d))
47291 return true;
47293 /* Try sequences of three instructions. */
47295 if (expand_vec_perm_even_odd_pack (d))
47296 return true;
47298 if (expand_vec_perm_2vperm2f128_vshuf (d))
47299 return true;
47301 if (expand_vec_perm_pshufb2 (d))
47302 return true;
47304 if (expand_vec_perm_interleave3 (d))
47305 return true;
47307 if (expand_vec_perm_vperm2f128_vblend (d))
47308 return true;
47310 /* Try sequences of four instructions. */
47312 if (expand_vec_perm_even_odd_trunc (d))
47313 return true;
47314 if (expand_vec_perm_vpshufb2_vpermq (d))
47315 return true;
47317 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
47318 return true;
47320 if (expand_vec_perm_vpermt2_vpshub2 (d))
47321 return true;
47323 /* ??? Look for narrow permutations whose element orderings would
47324 allow the promotion to a wider mode. */
47326 /* ??? Look for sequences of interleave or a wider permute that place
47327 the data into the correct lanes for a half-vector shuffle like
47328 pshuf[lh]w or vpermilps. */
47330 /* ??? Look for sequences of interleave that produce the desired results.
47331 The combinatorics of punpck[lh] get pretty ugly... */
47333 if (expand_vec_perm_even_odd (d))
47334 return true;
47336 /* Even longer sequences. */
47337 if (expand_vec_perm_vpshufb4_vpermq2 (d))
47338 return true;
47340 /* See if we can get the same permutation in different vector integer
47341 mode. */
47342 struct expand_vec_perm_d nd;
47343 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
47345 if (!d->testing_p)
47346 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
47347 return true;
47350 return false;
47353 /* If a permutation only uses one operand, make it clear. Returns true
47354 if the permutation references both operands. */
47356 static bool
47357 canonicalize_perm (struct expand_vec_perm_d *d)
47359 int i, which, nelt = d->nelt;
47361 for (i = which = 0; i < nelt; ++i)
47362 which |= (d->perm[i] < nelt ? 1 : 2);
47364 d->one_operand_p = true;
47365 switch (which)
47367 default:
47368 gcc_unreachable();
47370 case 3:
47371 if (!rtx_equal_p (d->op0, d->op1))
47373 d->one_operand_p = false;
47374 break;
47376 /* The elements of PERM do not suggest that only the first operand
47377 is used, but both operands are identical. Allow easier matching
47378 of the permutation by folding the permutation into the single
47379 input vector. */
47380 /* FALLTHRU */
47382 case 2:
47383 for (i = 0; i < nelt; ++i)
47384 d->perm[i] &= nelt - 1;
47385 d->op0 = d->op1;
47386 break;
47388 case 1:
47389 d->op1 = d->op0;
47390 break;
47393 return (which == 3);
47396 bool
47397 ix86_expand_vec_perm_const (rtx operands[4])
47399 struct expand_vec_perm_d d;
47400 unsigned char perm[MAX_VECT_LEN];
47401 int i, nelt;
47402 bool two_args;
47403 rtx sel;
47405 d.target = operands[0];
47406 d.op0 = operands[1];
47407 d.op1 = operands[2];
47408 sel = operands[3];
47410 d.vmode = GET_MODE (d.target);
47411 gcc_assert (VECTOR_MODE_P (d.vmode));
47412 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47413 d.testing_p = false;
47415 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
47416 gcc_assert (XVECLEN (sel, 0) == nelt);
47417 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
47419 for (i = 0; i < nelt; ++i)
47421 rtx e = XVECEXP (sel, 0, i);
47422 int ei = INTVAL (e) & (2 * nelt - 1);
47423 d.perm[i] = ei;
47424 perm[i] = ei;
47427 two_args = canonicalize_perm (&d);
47429 if (ix86_expand_vec_perm_const_1 (&d))
47430 return true;
47432 /* If the selector says both arguments are needed, but the operands are the
47433 same, the above tried to expand with one_operand_p and flattened selector.
47434 If that didn't work, retry without one_operand_p; we succeeded with that
47435 during testing. */
47436 if (two_args && d.one_operand_p)
47438 d.one_operand_p = false;
47439 memcpy (d.perm, perm, sizeof (perm));
47440 return ix86_expand_vec_perm_const_1 (&d);
47443 return false;
47446 /* Implement targetm.vectorize.vec_perm_const_ok. */
47448 static bool
47449 ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
47451 struct expand_vec_perm_d d;
47452 unsigned int i, nelt, which;
47453 bool ret;
47455 d.vmode = vmode;
47456 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47457 d.testing_p = true;
47459 /* Given sufficient ISA support we can just return true here
47460 for selected vector modes. */
47461 switch (d.vmode)
47463 case E_V16SFmode:
47464 case E_V16SImode:
47465 case E_V8DImode:
47466 case E_V8DFmode:
47467 if (TARGET_AVX512F)
47468 /* All implementable with a single vperm[it]2 insn. */
47469 return true;
47470 break;
47471 case E_V32HImode:
47472 if (TARGET_AVX512BW)
47473 /* All implementable with a single vperm[it]2 insn. */
47474 return true;
47475 break;
47476 case E_V64QImode:
47477 if (TARGET_AVX512BW)
47478 /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */
47479 return true;
47480 break;
47481 case E_V8SImode:
47482 case E_V8SFmode:
47483 case E_V4DFmode:
47484 case E_V4DImode:
47485 if (TARGET_AVX512VL)
47486 /* All implementable with a single vperm[it]2 insn. */
47487 return true;
47488 break;
47489 case E_V16HImode:
47490 if (TARGET_AVX2)
47491 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
47492 return true;
47493 break;
47494 case E_V32QImode:
47495 if (TARGET_AVX2)
47496 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
47497 return true;
47498 break;
47499 case E_V4SImode:
47500 case E_V4SFmode:
47501 case E_V8HImode:
47502 case E_V16QImode:
47503 /* All implementable with a single vpperm insn. */
47504 if (TARGET_XOP)
47505 return true;
47506 /* All implementable with 2 pshufb + 1 ior. */
47507 if (TARGET_SSSE3)
47508 return true;
47509 break;
47510 case E_V2DImode:
47511 case E_V2DFmode:
47512 /* All implementable with shufpd or unpck[lh]pd. */
47513 return true;
47514 default:
47515 return false;
47518 /* Extract the values from the vector CST into the permutation
47519 array in D. */
47520 for (i = which = 0; i < nelt; ++i)
47522 unsigned char e = sel[i];
47523 gcc_assert (e < 2 * nelt);
47524 d.perm[i] = e;
47525 which |= (e < nelt ? 1 : 2);
47528 /* For all elements from second vector, fold the elements to first. */
47529 if (which == 2)
47530 for (i = 0; i < nelt; ++i)
47531 d.perm[i] -= nelt;
47533 /* Check whether the mask can be applied to the vector type. */
47534 d.one_operand_p = (which != 3);
47536 /* Implementable with shufps or pshufd. */
47537 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
47538 return true;
47540 /* Otherwise we have to go through the motions and see if we can
47541 figure out how to generate the requested permutation. */
47542 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
47543 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
47544 if (!d.one_operand_p)
47545 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
47547 start_sequence ();
47548 ret = ix86_expand_vec_perm_const_1 (&d);
47549 end_sequence ();
47551 return ret;
47554 void
47555 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
47557 struct expand_vec_perm_d d;
47558 unsigned i, nelt;
47560 d.target = targ;
47561 d.op0 = op0;
47562 d.op1 = op1;
47563 d.vmode = GET_MODE (targ);
47564 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47565 d.one_operand_p = false;
47566 d.testing_p = false;
47568 for (i = 0; i < nelt; ++i)
47569 d.perm[i] = i * 2 + odd;
47571 /* We'll either be able to implement the permutation directly... */
47572 if (expand_vec_perm_1 (&d))
47573 return;
47575 /* ... or we use the special-case patterns. */
47576 expand_vec_perm_even_odd_1 (&d, odd);
47579 static void
47580 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
47582 struct expand_vec_perm_d d;
47583 unsigned i, nelt, base;
47584 bool ok;
47586 d.target = targ;
47587 d.op0 = op0;
47588 d.op1 = op1;
47589 d.vmode = GET_MODE (targ);
47590 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47591 d.one_operand_p = false;
47592 d.testing_p = false;
47594 base = high_p ? nelt / 2 : 0;
47595 for (i = 0; i < nelt / 2; ++i)
47597 d.perm[i * 2] = i + base;
47598 d.perm[i * 2 + 1] = i + base + nelt;
47601 /* Note that for AVX this isn't one instruction. */
47602 ok = ix86_expand_vec_perm_const_1 (&d);
47603 gcc_assert (ok);
47607 /* Expand a vector operation CODE for a V*QImode in terms of the
47608 same operation on V*HImode. */
47610 void
47611 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
47613 machine_mode qimode = GET_MODE (dest);
47614 machine_mode himode;
47615 rtx (*gen_il) (rtx, rtx, rtx);
47616 rtx (*gen_ih) (rtx, rtx, rtx);
47617 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
47618 struct expand_vec_perm_d d;
47619 bool ok, full_interleave;
47620 bool uns_p = false;
47621 int i;
47623 switch (qimode)
47625 case E_V16QImode:
47626 himode = V8HImode;
47627 gen_il = gen_vec_interleave_lowv16qi;
47628 gen_ih = gen_vec_interleave_highv16qi;
47629 break;
47630 case E_V32QImode:
47631 himode = V16HImode;
47632 gen_il = gen_avx2_interleave_lowv32qi;
47633 gen_ih = gen_avx2_interleave_highv32qi;
47634 break;
47635 case E_V64QImode:
47636 himode = V32HImode;
47637 gen_il = gen_avx512bw_interleave_lowv64qi;
47638 gen_ih = gen_avx512bw_interleave_highv64qi;
47639 break;
47640 default:
47641 gcc_unreachable ();
47644 op2_l = op2_h = op2;
47645 switch (code)
47647 case MULT:
47648 /* Unpack data such that we've got a source byte in each low byte of
47649 each word. We don't care what goes into the high byte of each word.
47650 Rather than trying to get zero in there, most convenient is to let
47651 it be a copy of the low byte. */
47652 op2_l = gen_reg_rtx (qimode);
47653 op2_h = gen_reg_rtx (qimode);
47654 emit_insn (gen_il (op2_l, op2, op2));
47655 emit_insn (gen_ih (op2_h, op2, op2));
47657 op1_l = gen_reg_rtx (qimode);
47658 op1_h = gen_reg_rtx (qimode);
47659 emit_insn (gen_il (op1_l, op1, op1));
47660 emit_insn (gen_ih (op1_h, op1, op1));
47661 full_interleave = qimode == V16QImode;
47662 break;
47664 case ASHIFT:
47665 case LSHIFTRT:
47666 uns_p = true;
47667 /* FALLTHRU */
47668 case ASHIFTRT:
47669 op1_l = gen_reg_rtx (himode);
47670 op1_h = gen_reg_rtx (himode);
47671 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
47672 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
47673 full_interleave = true;
47674 break;
47675 default:
47676 gcc_unreachable ();
47679 /* Perform the operation. */
47680 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
47681 1, OPTAB_DIRECT);
47682 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
47683 1, OPTAB_DIRECT);
47684 gcc_assert (res_l && res_h);
47686 /* Merge the data back into the right place. */
47687 d.target = dest;
47688 d.op0 = gen_lowpart (qimode, res_l);
47689 d.op1 = gen_lowpart (qimode, res_h);
47690 d.vmode = qimode;
47691 d.nelt = GET_MODE_NUNITS (qimode);
47692 d.one_operand_p = false;
47693 d.testing_p = false;
47695 if (full_interleave)
47697 /* For SSE2, we used an full interleave, so the desired
47698 results are in the even elements. */
47699 for (i = 0; i < d.nelt; ++i)
47700 d.perm[i] = i * 2;
47702 else
47704 /* For AVX, the interleave used above was not cross-lane. So the
47705 extraction is evens but with the second and third quarter swapped.
47706 Happily, that is even one insn shorter than even extraction.
47707 For AVX512BW we have 4 lanes. We extract evens from within a lane,
47708 always first from the first and then from the second source operand,
47709 the index bits above the low 4 bits remains the same.
47710 Thus, for d.nelt == 32 we want permutation
47711 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
47712 and for d.nelt == 64 we want permutation
47713 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
47714 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */
47715 for (i = 0; i < d.nelt; ++i)
47716 d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
47719 ok = ix86_expand_vec_perm_const_1 (&d);
47720 gcc_assert (ok);
47722 set_unique_reg_note (get_last_insn (), REG_EQUAL,
47723 gen_rtx_fmt_ee (code, qimode, op1, op2));
47726 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
47727 if op is CONST_VECTOR with all odd elements equal to their
47728 preceding element. */
47730 static bool
47731 const_vector_equal_evenodd_p (rtx op)
47733 machine_mode mode = GET_MODE (op);
47734 int i, nunits = GET_MODE_NUNITS (mode);
47735 if (GET_CODE (op) != CONST_VECTOR
47736 || nunits != CONST_VECTOR_NUNITS (op))
47737 return false;
47738 for (i = 0; i < nunits; i += 2)
47739 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
47740 return false;
47741 return true;
47744 void
47745 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
47746 bool uns_p, bool odd_p)
47748 machine_mode mode = GET_MODE (op1);
47749 machine_mode wmode = GET_MODE (dest);
47750 rtx x;
47751 rtx orig_op1 = op1, orig_op2 = op2;
47753 if (!nonimmediate_operand (op1, mode))
47754 op1 = force_reg (mode, op1);
47755 if (!nonimmediate_operand (op2, mode))
47756 op2 = force_reg (mode, op2);
47758 /* We only play even/odd games with vectors of SImode. */
47759 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
47761 /* If we're looking for the odd results, shift those members down to
47762 the even slots. For some cpus this is faster than a PSHUFD. */
47763 if (odd_p)
47765 /* For XOP use vpmacsdqh, but only for smult, as it is only
47766 signed. */
47767 if (TARGET_XOP && mode == V4SImode && !uns_p)
47769 x = force_reg (wmode, CONST0_RTX (wmode));
47770 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
47771 return;
47774 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
47775 if (!const_vector_equal_evenodd_p (orig_op1))
47776 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
47777 x, NULL, 1, OPTAB_DIRECT);
47778 if (!const_vector_equal_evenodd_p (orig_op2))
47779 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
47780 x, NULL, 1, OPTAB_DIRECT);
47781 op1 = gen_lowpart (mode, op1);
47782 op2 = gen_lowpart (mode, op2);
47785 if (mode == V16SImode)
47787 if (uns_p)
47788 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
47789 else
47790 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
47792 else if (mode == V8SImode)
47794 if (uns_p)
47795 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
47796 else
47797 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
47799 else if (uns_p)
47800 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
47801 else if (TARGET_SSE4_1)
47802 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
47803 else
47805 rtx s1, s2, t0, t1, t2;
47807 /* The easiest way to implement this without PMULDQ is to go through
47808 the motions as if we are performing a full 64-bit multiply. With
47809 the exception that we need to do less shuffling of the elements. */
47811 /* Compute the sign-extension, aka highparts, of the two operands. */
47812 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
47813 op1, pc_rtx, pc_rtx);
47814 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
47815 op2, pc_rtx, pc_rtx);
47817 /* Multiply LO(A) * HI(B), and vice-versa. */
47818 t1 = gen_reg_rtx (wmode);
47819 t2 = gen_reg_rtx (wmode);
47820 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
47821 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
47823 /* Multiply LO(A) * LO(B). */
47824 t0 = gen_reg_rtx (wmode);
47825 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
47827 /* Combine and shift the highparts into place. */
47828 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
47829 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
47830 1, OPTAB_DIRECT);
47832 /* Combine high and low parts. */
47833 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
47834 return;
47836 emit_insn (x);
47839 void
47840 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
47841 bool uns_p, bool high_p)
47843 machine_mode wmode = GET_MODE (dest);
47844 machine_mode mode = GET_MODE (op1);
47845 rtx t1, t2, t3, t4, mask;
47847 switch (mode)
47849 case E_V4SImode:
47850 t1 = gen_reg_rtx (mode);
47851 t2 = gen_reg_rtx (mode);
47852 if (TARGET_XOP && !uns_p)
47854 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
47855 shuffle the elements once so that all elements are in the right
47856 place for immediate use: { A C B D }. */
47857 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
47858 const1_rtx, GEN_INT (3)));
47859 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
47860 const1_rtx, GEN_INT (3)));
47862 else
47864 /* Put the elements into place for the multiply. */
47865 ix86_expand_vec_interleave (t1, op1, op1, high_p);
47866 ix86_expand_vec_interleave (t2, op2, op2, high_p);
47867 high_p = false;
47869 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
47870 break;
47872 case E_V8SImode:
47873 /* Shuffle the elements between the lanes. After this we
47874 have { A B E F | C D G H } for each operand. */
47875 t1 = gen_reg_rtx (V4DImode);
47876 t2 = gen_reg_rtx (V4DImode);
47877 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
47878 const0_rtx, const2_rtx,
47879 const1_rtx, GEN_INT (3)));
47880 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
47881 const0_rtx, const2_rtx,
47882 const1_rtx, GEN_INT (3)));
47884 /* Shuffle the elements within the lanes. After this we
47885 have { A A B B | C C D D } or { E E F F | G G H H }. */
47886 t3 = gen_reg_rtx (V8SImode);
47887 t4 = gen_reg_rtx (V8SImode);
47888 mask = GEN_INT (high_p
47889 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
47890 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
47891 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
47892 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
47894 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
47895 break;
47897 case E_V8HImode:
47898 case E_V16HImode:
47899 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
47900 uns_p, OPTAB_DIRECT);
47901 t2 = expand_binop (mode,
47902 uns_p ? umul_highpart_optab : smul_highpart_optab,
47903 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
47904 gcc_assert (t1 && t2);
47906 t3 = gen_reg_rtx (mode);
47907 ix86_expand_vec_interleave (t3, t1, t2, high_p);
47908 emit_move_insn (dest, gen_lowpart (wmode, t3));
47909 break;
47911 case E_V16QImode:
47912 case E_V32QImode:
47913 case E_V32HImode:
47914 case E_V16SImode:
47915 case E_V64QImode:
47916 t1 = gen_reg_rtx (wmode);
47917 t2 = gen_reg_rtx (wmode);
47918 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
47919 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
47921 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
47922 break;
47924 default:
47925 gcc_unreachable ();
47929 void
47930 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
47932 rtx res_1, res_2, res_3, res_4;
47934 res_1 = gen_reg_rtx (V4SImode);
47935 res_2 = gen_reg_rtx (V4SImode);
47936 res_3 = gen_reg_rtx (V2DImode);
47937 res_4 = gen_reg_rtx (V2DImode);
47938 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
47939 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
47941 /* Move the results in element 2 down to element 1; we don't care
47942 what goes in elements 2 and 3. Then we can merge the parts
47943 back together with an interleave.
47945 Note that two other sequences were tried:
47946 (1) Use interleaves at the start instead of psrldq, which allows
47947 us to use a single shufps to merge things back at the end.
47948 (2) Use shufps here to combine the two vectors, then pshufd to
47949 put the elements in the correct order.
47950 In both cases the cost of the reformatting stall was too high
47951 and the overall sequence slower. */
47953 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
47954 const0_rtx, const2_rtx,
47955 const0_rtx, const0_rtx));
47956 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
47957 const0_rtx, const2_rtx,
47958 const0_rtx, const0_rtx));
47959 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
47961 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
47964 void
47965 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
47967 machine_mode mode = GET_MODE (op0);
47968 rtx t1, t2, t3, t4, t5, t6;
47970 if (TARGET_AVX512DQ && mode == V8DImode)
47971 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
47972 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
47973 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
47974 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
47975 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
47976 else if (TARGET_XOP && mode == V2DImode)
47978 /* op1: A,B,C,D, op2: E,F,G,H */
47979 op1 = gen_lowpart (V4SImode, op1);
47980 op2 = gen_lowpart (V4SImode, op2);
47982 t1 = gen_reg_rtx (V4SImode);
47983 t2 = gen_reg_rtx (V4SImode);
47984 t3 = gen_reg_rtx (V2DImode);
47985 t4 = gen_reg_rtx (V2DImode);
47987 /* t1: B,A,D,C */
47988 emit_insn (gen_sse2_pshufd_1 (t1, op1,
47989 GEN_INT (1),
47990 GEN_INT (0),
47991 GEN_INT (3),
47992 GEN_INT (2)));
47994 /* t2: (B*E),(A*F),(D*G),(C*H) */
47995 emit_insn (gen_mulv4si3 (t2, t1, op2));
47997 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
47998 emit_insn (gen_xop_phadddq (t3, t2));
48000 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
48001 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
48003 /* Multiply lower parts and add all */
48004 t5 = gen_reg_rtx (V2DImode);
48005 emit_insn (gen_vec_widen_umult_even_v4si (t5,
48006 gen_lowpart (V4SImode, op1),
48007 gen_lowpart (V4SImode, op2)));
48008 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
48011 else
48013 machine_mode nmode;
48014 rtx (*umul) (rtx, rtx, rtx);
48016 if (mode == V2DImode)
48018 umul = gen_vec_widen_umult_even_v4si;
48019 nmode = V4SImode;
48021 else if (mode == V4DImode)
48023 umul = gen_vec_widen_umult_even_v8si;
48024 nmode = V8SImode;
48026 else if (mode == V8DImode)
48028 umul = gen_vec_widen_umult_even_v16si;
48029 nmode = V16SImode;
48031 else
48032 gcc_unreachable ();
48035 /* Multiply low parts. */
48036 t1 = gen_reg_rtx (mode);
48037 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
48039 /* Shift input vectors right 32 bits so we can multiply high parts. */
48040 t6 = GEN_INT (32);
48041 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
48042 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
48044 /* Multiply high parts by low parts. */
48045 t4 = gen_reg_rtx (mode);
48046 t5 = gen_reg_rtx (mode);
48047 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
48048 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
48050 /* Combine and shift the highparts back. */
48051 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
48052 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
48054 /* Combine high and low parts. */
48055 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
48058 set_unique_reg_note (get_last_insn (), REG_EQUAL,
48059 gen_rtx_MULT (mode, op1, op2));
48062 /* Return 1 if control tansfer instruction INSN
48063 should be encoded with bnd prefix.
48064 If insn is NULL then return 1 when control
48065 transfer instructions should be prefixed with
48066 bnd by default for current function. */
48068 bool
48069 ix86_bnd_prefixed_insn_p (rtx insn)
48071 /* For call insns check special flag. */
48072 if (insn && CALL_P (insn))
48074 rtx call = get_call_rtx_from (insn);
48075 if (call)
48076 return CALL_EXPR_WITH_BOUNDS_P (call);
48079 /* All other insns are prefixed only if function is instrumented. */
48080 return chkp_function_instrumented_p (current_function_decl);
48083 /* Return 1 if control tansfer instruction INSN
48084 should be encoded with notrack prefix. */
48086 static bool
48087 ix86_notrack_prefixed_insn_p (rtx insn)
48089 if (!insn || !((flag_cf_protection & CF_BRANCH) && TARGET_IBT))
48090 return false;
48092 if (CALL_P (insn))
48094 rtx call = get_call_rtx_from (insn);
48095 gcc_assert (call != NULL_RTX);
48096 rtx addr = XEXP (call, 0);
48098 /* Do not emit 'notrack' if it's not an indirect call. */
48099 if (MEM_P (addr)
48100 && GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
48101 return false;
48102 else
48103 return find_reg_note (insn, REG_CALL_NOCF_CHECK, 0);
48106 if (JUMP_P (insn) && !flag_cet_switch)
48108 rtx target = JUMP_LABEL (insn);
48109 if (target == NULL_RTX || ANY_RETURN_P (target))
48110 return false;
48112 /* Check the jump is a switch table. */
48113 rtx_insn *label = as_a<rtx_insn *> (target);
48114 rtx_insn *table = next_insn (label);
48115 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
48116 return false;
48117 else
48118 return true;
48120 return false;
48123 /* Calculate integer abs() using only SSE2 instructions. */
48125 void
48126 ix86_expand_sse2_abs (rtx target, rtx input)
48128 machine_mode mode = GET_MODE (target);
48129 rtx tmp0, tmp1, x;
48131 switch (mode)
48133 /* For 32-bit signed integer X, the best way to calculate the absolute
48134 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
48135 case E_V4SImode:
48136 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
48137 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
48138 NULL, 0, OPTAB_DIRECT);
48139 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
48140 NULL, 0, OPTAB_DIRECT);
48141 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
48142 target, 0, OPTAB_DIRECT);
48143 break;
48145 /* For 16-bit signed integer X, the best way to calculate the absolute
48146 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
48147 case E_V8HImode:
48148 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
48150 x = expand_simple_binop (mode, SMAX, tmp0, input,
48151 target, 0, OPTAB_DIRECT);
48152 break;
48154 /* For 8-bit signed integer X, the best way to calculate the absolute
48155 value of X is min ((unsigned char) X, (unsigned char) (-X)),
48156 as SSE2 provides the PMINUB insn. */
48157 case E_V16QImode:
48158 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
48160 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
48161 target, 0, OPTAB_DIRECT);
48162 break;
48164 default:
48165 gcc_unreachable ();
48168 if (x != target)
48169 emit_move_insn (target, x);
48172 /* Expand an extract from a vector register through pextr insn.
48173 Return true if successful. */
48175 bool
48176 ix86_expand_pextr (rtx *operands)
48178 rtx dst = operands[0];
48179 rtx src = operands[1];
48181 unsigned int size = INTVAL (operands[2]);
48182 unsigned int pos = INTVAL (operands[3]);
48184 if (SUBREG_P (dst))
48186 /* Reject non-lowpart subregs. */
48187 if (SUBREG_BYTE (dst) > 0)
48188 return false;
48189 dst = SUBREG_REG (dst);
48192 if (SUBREG_P (src))
48194 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
48195 src = SUBREG_REG (src);
48198 switch (GET_MODE (src))
48200 case E_V16QImode:
48201 case E_V8HImode:
48202 case E_V4SImode:
48203 case E_V2DImode:
48204 case E_V1TImode:
48205 case E_TImode:
48207 machine_mode srcmode, dstmode;
48208 rtx d, pat;
48210 if (!int_mode_for_size (size, 0).exists (&dstmode))
48211 return false;
48213 switch (dstmode)
48215 case E_QImode:
48216 if (!TARGET_SSE4_1)
48217 return false;
48218 srcmode = V16QImode;
48219 break;
48221 case E_HImode:
48222 if (!TARGET_SSE2)
48223 return false;
48224 srcmode = V8HImode;
48225 break;
48227 case E_SImode:
48228 if (!TARGET_SSE4_1)
48229 return false;
48230 srcmode = V4SImode;
48231 break;
48233 case E_DImode:
48234 gcc_assert (TARGET_64BIT);
48235 if (!TARGET_SSE4_1)
48236 return false;
48237 srcmode = V2DImode;
48238 break;
48240 default:
48241 return false;
48244 /* Reject extractions from misaligned positions. */
48245 if (pos & (size-1))
48246 return false;
48248 if (GET_MODE (dst) == dstmode)
48249 d = dst;
48250 else
48251 d = gen_reg_rtx (dstmode);
48253 /* Construct insn pattern. */
48254 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
48255 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
48257 /* Let the rtl optimizers know about the zero extension performed. */
48258 if (dstmode == QImode || dstmode == HImode)
48260 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
48261 d = gen_lowpart (SImode, d);
48264 emit_insn (gen_rtx_SET (d, pat));
48266 if (d != dst)
48267 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
48268 return true;
48271 default:
48272 return false;
48276 /* Expand an insert into a vector register through pinsr insn.
48277 Return true if successful. */
48279 bool
48280 ix86_expand_pinsr (rtx *operands)
48282 rtx dst = operands[0];
48283 rtx src = operands[3];
48285 unsigned int size = INTVAL (operands[1]);
48286 unsigned int pos = INTVAL (operands[2]);
48288 if (SUBREG_P (dst))
48290 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
48291 dst = SUBREG_REG (dst);
48294 switch (GET_MODE (dst))
48296 case E_V16QImode:
48297 case E_V8HImode:
48298 case E_V4SImode:
48299 case E_V2DImode:
48300 case E_V1TImode:
48301 case E_TImode:
48303 machine_mode srcmode, dstmode;
48304 rtx (*pinsr)(rtx, rtx, rtx, rtx);
48305 rtx d;
48307 if (!int_mode_for_size (size, 0).exists (&srcmode))
48308 return false;
48310 switch (srcmode)
48312 case E_QImode:
48313 if (!TARGET_SSE4_1)
48314 return false;
48315 dstmode = V16QImode;
48316 pinsr = gen_sse4_1_pinsrb;
48317 break;
48319 case E_HImode:
48320 if (!TARGET_SSE2)
48321 return false;
48322 dstmode = V8HImode;
48323 pinsr = gen_sse2_pinsrw;
48324 break;
48326 case E_SImode:
48327 if (!TARGET_SSE4_1)
48328 return false;
48329 dstmode = V4SImode;
48330 pinsr = gen_sse4_1_pinsrd;
48331 break;
48333 case E_DImode:
48334 gcc_assert (TARGET_64BIT);
48335 if (!TARGET_SSE4_1)
48336 return false;
48337 dstmode = V2DImode;
48338 pinsr = gen_sse4_1_pinsrq;
48339 break;
48341 default:
48342 return false;
48345 /* Reject insertions to misaligned positions. */
48346 if (pos & (size-1))
48347 return false;
48349 if (SUBREG_P (src))
48351 unsigned int srcpos = SUBREG_BYTE (src);
48353 if (srcpos > 0)
48355 rtx extr_ops[4];
48357 extr_ops[0] = gen_reg_rtx (srcmode);
48358 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
48359 extr_ops[2] = GEN_INT (size);
48360 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
48362 if (!ix86_expand_pextr (extr_ops))
48363 return false;
48365 src = extr_ops[0];
48367 else
48368 src = gen_lowpart (srcmode, SUBREG_REG (src));
48371 if (GET_MODE (dst) == dstmode)
48372 d = dst;
48373 else
48374 d = gen_reg_rtx (dstmode);
48376 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
48377 gen_lowpart (srcmode, src),
48378 GEN_INT (1 << (pos / size))));
48379 if (d != dst)
48380 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
48381 return true;
48384 default:
48385 return false;
48389 /* This function returns the calling abi specific va_list type node.
48390 It returns the FNDECL specific va_list type. */
48392 static tree
48393 ix86_fn_abi_va_list (tree fndecl)
48395 if (!TARGET_64BIT)
48396 return va_list_type_node;
48397 gcc_assert (fndecl != NULL_TREE);
48399 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
48400 return ms_va_list_type_node;
48401 else
48402 return sysv_va_list_type_node;
48405 /* Returns the canonical va_list type specified by TYPE. If there
48406 is no valid TYPE provided, it return NULL_TREE. */
48408 static tree
48409 ix86_canonical_va_list_type (tree type)
48411 if (TARGET_64BIT)
48413 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
48414 return ms_va_list_type_node;
48416 if ((TREE_CODE (type) == ARRAY_TYPE
48417 && integer_zerop (array_type_nelts (type)))
48418 || POINTER_TYPE_P (type))
48420 tree elem_type = TREE_TYPE (type);
48421 if (TREE_CODE (elem_type) == RECORD_TYPE
48422 && lookup_attribute ("sysv_abi va_list",
48423 TYPE_ATTRIBUTES (elem_type)))
48424 return sysv_va_list_type_node;
48427 return NULL_TREE;
48430 return std_canonical_va_list_type (type);
48433 /* Iterate through the target-specific builtin types for va_list.
48434 IDX denotes the iterator, *PTREE is set to the result type of
48435 the va_list builtin, and *PNAME to its internal type.
48436 Returns zero if there is no element for this index, otherwise
48437 IDX should be increased upon the next call.
48438 Note, do not iterate a base builtin's name like __builtin_va_list.
48439 Used from c_common_nodes_and_builtins. */
48441 static int
48442 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
48444 if (TARGET_64BIT)
48446 switch (idx)
48448 default:
48449 break;
48451 case 0:
48452 *ptree = ms_va_list_type_node;
48453 *pname = "__builtin_ms_va_list";
48454 return 1;
48456 case 1:
48457 *ptree = sysv_va_list_type_node;
48458 *pname = "__builtin_sysv_va_list";
48459 return 1;
48463 return 0;
48466 #undef TARGET_SCHED_DISPATCH
48467 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
48468 #undef TARGET_SCHED_DISPATCH_DO
48469 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
48470 #undef TARGET_SCHED_REASSOCIATION_WIDTH
48471 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
48472 #undef TARGET_SCHED_REORDER
48473 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
48474 #undef TARGET_SCHED_ADJUST_PRIORITY
48475 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
48476 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
48477 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
48478 ix86_dependencies_evaluation_hook
48481 /* Implementation of reassociation_width target hook used by
48482 reassoc phase to identify parallelism level in reassociated
48483 tree. Statements tree_code is passed in OPC. Arguments type
48484 is passed in MODE. */
48486 static int
48487 ix86_reassociation_width (unsigned int op, machine_mode mode)
48489 int width = 1;
48490 /* Vector part. */
48491 if (VECTOR_MODE_P (mode))
48493 int div = 1;
48494 if (INTEGRAL_MODE_P (mode))
48495 width = ix86_cost->reassoc_vec_int;
48496 else if (FLOAT_MODE_P (mode))
48497 width = ix86_cost->reassoc_vec_fp;
48499 if (width == 1)
48500 return 1;
48502 /* Integer vector instructions execute in FP unit
48503 and can execute 3 additions and one multiplication per cycle. */
48504 if (ix86_tune == PROCESSOR_ZNVER1 && INTEGRAL_MODE_P (mode)
48505 && op != PLUS && op != MINUS)
48506 return 1;
48508 /* Account for targets that splits wide vectors into multiple parts. */
48509 if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (mode) > 128)
48510 div = GET_MODE_BITSIZE (mode) / 128;
48511 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
48512 div = GET_MODE_BITSIZE (mode) / 64;
48513 width = (width + div - 1) / div;
48515 /* Scalar part. */
48516 else if (INTEGRAL_MODE_P (mode))
48517 width = ix86_cost->reassoc_int;
48518 else if (FLOAT_MODE_P (mode))
48519 width = ix86_cost->reassoc_fp;
48521 /* Avoid using too many registers in 32bit mode. */
48522 if (!TARGET_64BIT && width > 2)
48523 width = 2;
48524 return width;
48527 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
48528 place emms and femms instructions. */
48530 static machine_mode
48531 ix86_preferred_simd_mode (scalar_mode mode)
48533 if (!TARGET_SSE)
48534 return word_mode;
48536 switch (mode)
48538 case E_QImode:
48539 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
48540 return V64QImode;
48541 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48542 return V32QImode;
48543 else
48544 return V16QImode;
48546 case E_HImode:
48547 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
48548 return V32HImode;
48549 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48550 return V16HImode;
48551 else
48552 return V8HImode;
48554 case E_SImode:
48555 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48556 return V16SImode;
48557 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48558 return V8SImode;
48559 else
48560 return V4SImode;
48562 case E_DImode:
48563 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48564 return V8DImode;
48565 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48566 return V4DImode;
48567 else
48568 return V2DImode;
48570 case E_SFmode:
48571 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48572 return V16SFmode;
48573 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48574 return V8SFmode;
48575 else
48576 return V4SFmode;
48578 case E_DFmode:
48579 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48580 return V8DFmode;
48581 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48582 return V4DFmode;
48583 else if (TARGET_SSE2)
48584 return V2DFmode;
48585 /* FALLTHRU */
48587 default:
48588 return word_mode;
48592 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
48593 vectors. If AVX512F is enabled then try vectorizing with 512bit,
48594 256bit and 128bit vectors. */
48596 static unsigned int
48597 ix86_autovectorize_vector_sizes (void)
48599 unsigned int bytesizes = 0;
48601 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
48602 bytesizes |= (64 | 32 | 16);
48603 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
48604 bytesizes |= (32 | 16);
48606 return bytesizes;
48609 /* Implemenation of targetm.vectorize.get_mask_mode. */
48611 static opt_machine_mode
48612 ix86_get_mask_mode (unsigned nunits, unsigned vector_size)
48614 unsigned elem_size = vector_size / nunits;
48616 /* Scalar mask case. */
48617 if ((TARGET_AVX512F && vector_size == 64)
48618 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
48620 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
48621 return smallest_int_mode_for_size (nunits);
48624 scalar_int_mode elem_mode
48625 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
48627 gcc_assert (elem_size * nunits == vector_size);
48629 return mode_for_vector (elem_mode, nunits);
48634 /* Return class of registers which could be used for pseudo of MODE
48635 and of class RCLASS for spilling instead of memory. Return NO_REGS
48636 if it is not possible or non-profitable. */
48638 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
48640 static reg_class_t
48641 ix86_spill_class (reg_class_t rclass, machine_mode mode)
48643 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
48644 && TARGET_SSE2
48645 && TARGET_INTER_UNIT_MOVES_TO_VEC
48646 && TARGET_INTER_UNIT_MOVES_FROM_VEC
48647 && (mode == SImode || (TARGET_64BIT && mode == DImode))
48648 && INTEGER_CLASS_P (rclass))
48649 return ALL_SSE_REGS;
48650 return NO_REGS;
48653 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
48654 but returns a lower bound. */
48656 static unsigned int
48657 ix86_max_noce_ifcvt_seq_cost (edge e)
48659 bool predictable_p = predictable_edge_p (e);
48661 enum compiler_param param
48662 = (predictable_p
48663 ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
48664 : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST);
48666 /* If we have a parameter set, use that, otherwise take a guess using
48667 BRANCH_COST. */
48668 if (global_options_set.x_param_values[param])
48669 return PARAM_VALUE (param);
48670 else
48671 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
48674 /* Return true if SEQ is a good candidate as a replacement for the
48675 if-convertible sequence described in IF_INFO. */
48677 static bool
48678 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
48680 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
48682 int cmov_cnt = 0;
48683 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
48684 Maybe we should allow even more conditional moves as long as they
48685 are used far enough not to stall the CPU, or also consider
48686 IF_INFO->TEST_BB succ edge probabilities. */
48687 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
48689 rtx set = single_set (insn);
48690 if (!set)
48691 continue;
48692 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
48693 continue;
48694 rtx src = SET_SRC (set);
48695 machine_mode mode = GET_MODE (src);
48696 if (GET_MODE_CLASS (mode) != MODE_INT
48697 && GET_MODE_CLASS (mode) != MODE_FLOAT)
48698 continue;
48699 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
48700 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
48701 continue;
48702 /* insn is CMOV or FCMOV. */
48703 if (++cmov_cnt > 1)
48704 return false;
48707 return default_noce_conversion_profitable_p (seq, if_info);
48710 /* Implement targetm.vectorize.init_cost. */
48712 static void *
48713 ix86_init_cost (struct loop *)
48715 unsigned *cost = XNEWVEC (unsigned, 3);
48716 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
48717 return cost;
48720 /* Implement targetm.vectorize.add_stmt_cost. */
48722 static unsigned
48723 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
48724 struct _stmt_vec_info *stmt_info, int misalign,
48725 enum vect_cost_model_location where)
48727 unsigned *cost = (unsigned *) data;
48728 unsigned retval = 0;
48730 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
48731 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
48733 /* Penalize DFmode vector operations for Bonnell. */
48734 if (TARGET_BONNELL && kind == vector_stmt
48735 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
48736 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
48738 /* Statements in an inner loop relative to the loop being
48739 vectorized are weighted more heavily. The value here is
48740 arbitrary and could potentially be improved with analysis. */
48741 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
48742 count *= 50; /* FIXME. */
48744 retval = (unsigned) (count * stmt_cost);
48746 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
48747 for Silvermont as it has out of order integer pipeline and can execute
48748 2 scalar instruction per tick, but has in order SIMD pipeline. */
48749 if ((TARGET_SILVERMONT || TARGET_INTEL)
48750 && stmt_info && stmt_info->stmt)
48752 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
48753 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
48754 retval = (retval * 17) / 10;
48757 cost[where] += retval;
48759 return retval;
48762 /* Implement targetm.vectorize.finish_cost. */
48764 static void
48765 ix86_finish_cost (void *data, unsigned *prologue_cost,
48766 unsigned *body_cost, unsigned *epilogue_cost)
48768 unsigned *cost = (unsigned *) data;
48769 *prologue_cost = cost[vect_prologue];
48770 *body_cost = cost[vect_body];
48771 *epilogue_cost = cost[vect_epilogue];
48774 /* Implement targetm.vectorize.destroy_cost_data. */
48776 static void
48777 ix86_destroy_cost_data (void *data)
48779 free (data);
48782 /* Validate target specific memory model bits in VAL. */
48784 static unsigned HOST_WIDE_INT
48785 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
48787 enum memmodel model = memmodel_from_int (val);
48788 bool strong;
48790 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
48791 |MEMMODEL_MASK)
48792 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
48794 warning (OPT_Winvalid_memory_model,
48795 "Unknown architecture specific memory model");
48796 return MEMMODEL_SEQ_CST;
48798 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
48799 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
48801 warning (OPT_Winvalid_memory_model,
48802 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
48803 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
48805 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
48807 warning (OPT_Winvalid_memory_model,
48808 "HLE_RELEASE not used with RELEASE or stronger memory model");
48809 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
48811 return val;
48814 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
48815 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
48816 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
48817 or number of vecsize_mangle variants that should be emitted. */
48819 static int
48820 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
48821 struct cgraph_simd_clone *clonei,
48822 tree base_type, int num)
48824 int ret = 1;
48826 if (clonei->simdlen
48827 && (clonei->simdlen < 2
48828 || clonei->simdlen > 1024
48829 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
48831 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
48832 "unsupported simdlen %d", clonei->simdlen);
48833 return 0;
48836 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
48837 if (TREE_CODE (ret_type) != VOID_TYPE)
48838 switch (TYPE_MODE (ret_type))
48840 case E_QImode:
48841 case E_HImode:
48842 case E_SImode:
48843 case E_DImode:
48844 case E_SFmode:
48845 case E_DFmode:
48846 /* case E_SCmode: */
48847 /* case E_DCmode: */
48848 break;
48849 default:
48850 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
48851 "unsupported return type %qT for simd\n", ret_type);
48852 return 0;
48855 tree t;
48856 int i;
48858 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
48859 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
48860 switch (TYPE_MODE (TREE_TYPE (t)))
48862 case E_QImode:
48863 case E_HImode:
48864 case E_SImode:
48865 case E_DImode:
48866 case E_SFmode:
48867 case E_DFmode:
48868 /* case E_SCmode: */
48869 /* case E_DCmode: */
48870 break;
48871 default:
48872 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
48873 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
48874 return 0;
48877 if (clonei->cilk_elemental)
48879 /* Parse here processor clause. If not present, default to 'b'. */
48880 clonei->vecsize_mangle = 'b';
48882 else if (!TREE_PUBLIC (node->decl))
48884 /* If the function isn't exported, we can pick up just one ISA
48885 for the clones. */
48886 if (TARGET_AVX512F)
48887 clonei->vecsize_mangle = 'e';
48888 else if (TARGET_AVX2)
48889 clonei->vecsize_mangle = 'd';
48890 else if (TARGET_AVX)
48891 clonei->vecsize_mangle = 'c';
48892 else
48893 clonei->vecsize_mangle = 'b';
48894 ret = 1;
48896 else
48898 clonei->vecsize_mangle = "bcde"[num];
48899 ret = 4;
48901 clonei->mask_mode = VOIDmode;
48902 switch (clonei->vecsize_mangle)
48904 case 'b':
48905 clonei->vecsize_int = 128;
48906 clonei->vecsize_float = 128;
48907 break;
48908 case 'c':
48909 clonei->vecsize_int = 128;
48910 clonei->vecsize_float = 256;
48911 break;
48912 case 'd':
48913 clonei->vecsize_int = 256;
48914 clonei->vecsize_float = 256;
48915 break;
48916 case 'e':
48917 clonei->vecsize_int = 512;
48918 clonei->vecsize_float = 512;
48919 if (TYPE_MODE (base_type) == QImode)
48920 clonei->mask_mode = DImode;
48921 else
48922 clonei->mask_mode = SImode;
48923 break;
48925 if (clonei->simdlen == 0)
48927 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
48928 clonei->simdlen = clonei->vecsize_int;
48929 else
48930 clonei->simdlen = clonei->vecsize_float;
48931 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
48933 else if (clonei->simdlen > 16)
48935 /* For compatibility with ICC, use the same upper bounds
48936 for simdlen. In particular, for CTYPE below, use the return type,
48937 unless the function returns void, in that case use the characteristic
48938 type. If it is possible for given SIMDLEN to pass CTYPE value
48939 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
48940 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
48941 emit corresponding clone. */
48942 tree ctype = ret_type;
48943 if (TREE_CODE (ret_type) == VOID_TYPE)
48944 ctype = base_type;
48945 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
48946 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
48947 cnt /= clonei->vecsize_int;
48948 else
48949 cnt /= clonei->vecsize_float;
48950 if (cnt > (TARGET_64BIT ? 16 : 8))
48952 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
48953 "unsupported simdlen %d", clonei->simdlen);
48954 return 0;
48957 return ret;
48960 /* Add target attribute to SIMD clone NODE if needed. */
48962 static void
48963 ix86_simd_clone_adjust (struct cgraph_node *node)
48965 const char *str = NULL;
48966 gcc_assert (node->decl == cfun->decl);
48967 switch (node->simdclone->vecsize_mangle)
48969 case 'b':
48970 if (!TARGET_SSE2)
48971 str = "sse2";
48972 break;
48973 case 'c':
48974 if (!TARGET_AVX)
48975 str = "avx";
48976 break;
48977 case 'd':
48978 if (!TARGET_AVX2)
48979 str = "avx2";
48980 break;
48981 case 'e':
48982 if (!TARGET_AVX512F)
48983 str = "avx512f";
48984 break;
48985 default:
48986 gcc_unreachable ();
48988 if (str == NULL)
48989 return;
48990 push_cfun (NULL);
48991 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
48992 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
48993 gcc_assert (ok);
48994 pop_cfun ();
48995 ix86_reset_previous_fndecl ();
48996 ix86_set_current_function (node->decl);
48999 /* If SIMD clone NODE can't be used in a vectorized loop
49000 in current function, return -1, otherwise return a badness of using it
49001 (0 if it is most desirable from vecsize_mangle point of view, 1
49002 slightly less desirable, etc.). */
49004 static int
49005 ix86_simd_clone_usable (struct cgraph_node *node)
49007 switch (node->simdclone->vecsize_mangle)
49009 case 'b':
49010 if (!TARGET_SSE2)
49011 return -1;
49012 if (!TARGET_AVX)
49013 return 0;
49014 return TARGET_AVX2 ? 2 : 1;
49015 case 'c':
49016 if (!TARGET_AVX)
49017 return -1;
49018 return TARGET_AVX2 ? 1 : 0;
49019 case 'd':
49020 if (!TARGET_AVX2)
49021 return -1;
49022 return 0;
49023 case 'e':
49024 if (!TARGET_AVX512F)
49025 return -1;
49026 return 0;
49027 default:
49028 gcc_unreachable ();
49032 /* This function adjusts the unroll factor based on
49033 the hardware capabilities. For ex, bdver3 has
49034 a loop buffer which makes unrolling of smaller
49035 loops less important. This function decides the
49036 unroll factor using number of memory references
49037 (value 32 is used) as a heuristic. */
49039 static unsigned
49040 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
49042 basic_block *bbs;
49043 rtx_insn *insn;
49044 unsigned i;
49045 unsigned mem_count = 0;
49047 if (!TARGET_ADJUST_UNROLL)
49048 return nunroll;
49050 /* Count the number of memory references within the loop body.
49051 This value determines the unrolling factor for bdver3 and bdver4
49052 architectures. */
49053 subrtx_iterator::array_type array;
49054 bbs = get_loop_body (loop);
49055 for (i = 0; i < loop->num_nodes; i++)
49056 FOR_BB_INSNS (bbs[i], insn)
49057 if (NONDEBUG_INSN_P (insn))
49058 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
49059 if (const_rtx x = *iter)
49060 if (MEM_P (x))
49062 machine_mode mode = GET_MODE (x);
49063 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
49064 if (n_words > 4)
49065 mem_count += 2;
49066 else
49067 mem_count += 1;
49069 free (bbs);
49071 if (mem_count && mem_count <=32)
49072 return 32/mem_count;
49074 return nunroll;
49078 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
49080 static bool
49081 ix86_float_exceptions_rounding_supported_p (void)
49083 /* For x87 floating point with standard excess precision handling,
49084 there is no adddf3 pattern (since x87 floating point only has
49085 XFmode operations) so the default hook implementation gets this
49086 wrong. */
49087 return TARGET_80387 || TARGET_SSE_MATH;
49090 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
49092 static void
49093 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
49095 if (!TARGET_80387 && !TARGET_SSE_MATH)
49096 return;
49097 tree exceptions_var = create_tmp_var_raw (integer_type_node);
49098 if (TARGET_80387)
49100 tree fenv_index_type = build_index_type (size_int (6));
49101 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
49102 tree fenv_var = create_tmp_var_raw (fenv_type);
49103 TREE_ADDRESSABLE (fenv_var) = 1;
49104 tree fenv_ptr = build_pointer_type (fenv_type);
49105 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
49106 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
49107 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
49108 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
49109 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
49110 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
49111 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
49112 tree hold_fnclex = build_call_expr (fnclex, 0);
49113 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
49114 NULL_TREE, NULL_TREE);
49115 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
49116 hold_fnclex);
49117 *clear = build_call_expr (fnclex, 0);
49118 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
49119 tree fnstsw_call = build_call_expr (fnstsw, 0);
49120 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
49121 sw_var, fnstsw_call);
49122 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
49123 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
49124 exceptions_var, exceptions_x87);
49125 *update = build2 (COMPOUND_EXPR, integer_type_node,
49126 sw_mod, update_mod);
49127 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
49128 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
49130 if (TARGET_SSE_MATH)
49132 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
49133 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
49134 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
49135 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
49136 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
49137 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
49138 mxcsr_orig_var, stmxcsr_hold_call);
49139 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
49140 mxcsr_orig_var,
49141 build_int_cst (unsigned_type_node, 0x1f80));
49142 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
49143 build_int_cst (unsigned_type_node, 0xffffffc0));
49144 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
49145 mxcsr_mod_var, hold_mod_val);
49146 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
49147 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
49148 hold_assign_orig, hold_assign_mod);
49149 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
49150 ldmxcsr_hold_call);
49151 if (*hold)
49152 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
49153 else
49154 *hold = hold_all;
49155 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
49156 if (*clear)
49157 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
49158 ldmxcsr_clear_call);
49159 else
49160 *clear = ldmxcsr_clear_call;
49161 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
49162 tree exceptions_sse = fold_convert (integer_type_node,
49163 stxmcsr_update_call);
49164 if (*update)
49166 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
49167 exceptions_var, exceptions_sse);
49168 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
49169 exceptions_var, exceptions_mod);
49170 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
49171 exceptions_assign);
49173 else
49174 *update = build2 (MODIFY_EXPR, integer_type_node,
49175 exceptions_var, exceptions_sse);
49176 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
49177 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
49178 ldmxcsr_update_call);
49180 tree atomic_feraiseexcept
49181 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
49182 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
49183 1, exceptions_var);
49184 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
49185 atomic_feraiseexcept_call);
49188 /* Return mode to be used for bounds or VOIDmode
49189 if bounds are not supported. */
49191 static machine_mode
49192 ix86_mpx_bound_mode ()
49194 /* Do not support pointer checker if MPX
49195 is not enabled. */
49196 if (!TARGET_MPX)
49198 if (flag_check_pointer_bounds)
49199 warning (0, "Pointer Checker requires MPX support on this target."
49200 " Use -mmpx options to enable MPX.");
49201 return VOIDmode;
49204 return BNDmode;
49207 /* Return constant used to statically initialize constant bounds.
49209 This function is used to create special bound values. For now
49210 only INIT bounds and NONE bounds are expected. More special
49211 values may be added later. */
49213 static tree
49214 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
49216 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
49217 : build_zero_cst (pointer_sized_int_node);
49218 tree high = ub ? build_zero_cst (pointer_sized_int_node)
49219 : build_minus_one_cst (pointer_sized_int_node);
49221 /* This function is supposed to be used to create INIT and
49222 NONE bounds only. */
49223 gcc_assert ((lb == 0 && ub == -1)
49224 || (lb == -1 && ub == 0));
49226 return build_complex (NULL, low, high);
49229 /* Generate a list of statements STMTS to initialize pointer bounds
49230 variable VAR with bounds LB and UB. Return the number of generated
49231 statements. */
49233 static int
49234 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
49236 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
49237 tree lhs, modify, var_p;
49239 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
49240 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
49242 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
49243 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
49244 append_to_statement_list (modify, stmts);
49246 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
49247 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
49248 TYPE_SIZE_UNIT (pointer_sized_int_node)));
49249 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
49250 append_to_statement_list (modify, stmts);
49252 return 2;
49255 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
49256 /* For i386, common symbol is local only for non-PIE binaries. For
49257 x86-64, common symbol is local only for non-PIE binaries or linker
49258 supports copy reloc in PIE binaries. */
49260 static bool
49261 ix86_binds_local_p (const_tree exp)
49263 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
49264 (!flag_pic
49265 || (TARGET_64BIT
49266 && HAVE_LD_PIE_COPYRELOC != 0)));
49268 #endif
49270 /* If MEM is in the form of [base+offset], extract the two parts
49271 of address and set to BASE and OFFSET, otherwise return false. */
49273 static bool
49274 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
49276 rtx addr;
49278 gcc_assert (MEM_P (mem));
49280 addr = XEXP (mem, 0);
49282 if (GET_CODE (addr) == CONST)
49283 addr = XEXP (addr, 0);
49285 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
49287 *base = addr;
49288 *offset = const0_rtx;
49289 return true;
49292 if (GET_CODE (addr) == PLUS
49293 && (REG_P (XEXP (addr, 0))
49294 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
49295 && CONST_INT_P (XEXP (addr, 1)))
49297 *base = XEXP (addr, 0);
49298 *offset = XEXP (addr, 1);
49299 return true;
49302 return false;
49305 /* Given OPERANDS of consecutive load/store, check if we can merge
49306 them into move multiple. LOAD is true if they are load instructions.
49307 MODE is the mode of memory operands. */
49309 bool
49310 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
49311 machine_mode mode)
49313 HOST_WIDE_INT offval_1, offval_2, msize;
49314 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
49316 if (load)
49318 mem_1 = operands[1];
49319 mem_2 = operands[3];
49320 reg_1 = operands[0];
49321 reg_2 = operands[2];
49323 else
49325 mem_1 = operands[0];
49326 mem_2 = operands[2];
49327 reg_1 = operands[1];
49328 reg_2 = operands[3];
49331 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
49333 if (REGNO (reg_1) != REGNO (reg_2))
49334 return false;
49336 /* Check if the addresses are in the form of [base+offset]. */
49337 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
49338 return false;
49339 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
49340 return false;
49342 /* Check if the bases are the same. */
49343 if (!rtx_equal_p (base_1, base_2))
49344 return false;
49346 offval_1 = INTVAL (offset_1);
49347 offval_2 = INTVAL (offset_2);
49348 msize = GET_MODE_SIZE (mode);
49349 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
49350 if (offval_1 + msize != offval_2)
49351 return false;
49353 return true;
49356 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
49358 static bool
49359 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
49360 optimization_type opt_type)
49362 switch (op)
49364 case asin_optab:
49365 case acos_optab:
49366 case log1p_optab:
49367 case exp_optab:
49368 case exp10_optab:
49369 case exp2_optab:
49370 case expm1_optab:
49371 case ldexp_optab:
49372 case scalb_optab:
49373 case round_optab:
49374 return opt_type == OPTIMIZE_FOR_SPEED;
49376 case rint_optab:
49377 if (SSE_FLOAT_MODE_P (mode1)
49378 && TARGET_SSE_MATH
49379 && !flag_trapping_math
49380 && !TARGET_SSE4_1)
49381 return opt_type == OPTIMIZE_FOR_SPEED;
49382 return true;
49384 case floor_optab:
49385 case ceil_optab:
49386 case btrunc_optab:
49387 if (SSE_FLOAT_MODE_P (mode1)
49388 && TARGET_SSE_MATH
49389 && !flag_trapping_math
49390 && TARGET_SSE4_1)
49391 return true;
49392 return opt_type == OPTIMIZE_FOR_SPEED;
49394 case rsqrt_optab:
49395 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
49397 default:
49398 return true;
49402 /* Address space support.
49404 This is not "far pointers" in the 16-bit sense, but an easy way
49405 to use %fs and %gs segment prefixes. Therefore:
49407 (a) All address spaces have the same modes,
49408 (b) All address spaces have the same addresss forms,
49409 (c) While %fs and %gs are technically subsets of the generic
49410 address space, they are probably not subsets of each other.
49411 (d) Since we have no access to the segment base register values
49412 without resorting to a system call, we cannot convert a
49413 non-default address space to a default address space.
49414 Therefore we do not claim %fs or %gs are subsets of generic.
49416 Therefore we can (mostly) use the default hooks. */
49418 /* All use of segmentation is assumed to make address 0 valid. */
49420 static bool
49421 ix86_addr_space_zero_address_valid (addr_space_t as)
49423 return as != ADDR_SPACE_GENERIC;
49426 static void
49427 ix86_init_libfuncs (void)
49429 if (TARGET_64BIT)
49431 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
49432 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
49434 else
49436 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
49437 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
49440 #if TARGET_MACHO
49441 darwin_rename_builtins ();
49442 #endif
49445 /* Generate call to __divmoddi4. */
49447 static void
49448 ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
49449 rtx op0, rtx op1,
49450 rtx *quot_p, rtx *rem_p)
49452 rtx rem = assign_386_stack_local (mode, SLOT_TEMP);
49454 rtx quot = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL,
49455 mode,
49456 op0, GET_MODE (op0),
49457 op1, GET_MODE (op1),
49458 XEXP (rem, 0), Pmode);
49459 *quot_p = quot;
49460 *rem_p = rem;
49463 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
49464 FPU, assume that the fpcw is set to extended precision; when using
49465 only SSE, rounding is correct; when using both SSE and the FPU,
49466 the rounding precision is indeterminate, since either may be chosen
49467 apparently at random. */
49469 static enum flt_eval_method
49470 ix86_excess_precision (enum excess_precision_type type)
49472 switch (type)
49474 case EXCESS_PRECISION_TYPE_FAST:
49475 /* The fastest type to promote to will always be the native type,
49476 whether that occurs with implicit excess precision or
49477 otherwise. */
49478 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
49479 case EXCESS_PRECISION_TYPE_STANDARD:
49480 case EXCESS_PRECISION_TYPE_IMPLICIT:
49481 /* Otherwise, the excess precision we want when we are
49482 in a standards compliant mode, and the implicit precision we
49483 provide would be identical were it not for the unpredictable
49484 cases. */
49485 if (!TARGET_80387)
49486 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
49487 else if (!TARGET_MIX_SSE_I387)
49489 if (!TARGET_SSE_MATH)
49490 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
49491 else if (TARGET_SSE2)
49492 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
49495 /* If we are in standards compliant mode, but we know we will
49496 calculate in unpredictable precision, return
49497 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
49498 excess precision if the target can't guarantee it will honor
49499 it. */
49500 return (type == EXCESS_PRECISION_TYPE_STANDARD
49501 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
49502 : FLT_EVAL_METHOD_UNPREDICTABLE);
49503 default:
49504 gcc_unreachable ();
49507 return FLT_EVAL_METHOD_UNPREDICTABLE;
49510 /* Target-specific selftests. */
49512 #if CHECKING_P
49514 namespace selftest {
49516 /* Verify that hard regs are dumped as expected (in compact mode). */
49518 static void
49519 ix86_test_dumping_hard_regs ()
49521 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
49522 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
49525 /* Test dumping an insn with repeated references to the same SCRATCH,
49526 to verify the rtx_reuse code. */
49528 static void
49529 ix86_test_dumping_memory_blockage ()
49531 set_new_first_and_last_insn (NULL, NULL);
49533 rtx pat = gen_memory_blockage ();
49534 rtx_reuse_manager r;
49535 r.preprocess (pat);
49537 /* Verify that the repeated references to the SCRATCH show use
49538 reuse IDS. The first should be prefixed with a reuse ID,
49539 and the second should be dumped as a "reuse_rtx" of that ID.
49540 The expected string assumes Pmode == DImode. */
49541 if (Pmode == DImode)
49542 ASSERT_RTL_DUMP_EQ_WITH_REUSE
49543 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
49544 " (unspec:BLK [\n"
49545 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
49546 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
49549 /* Verify loading an RTL dump; specifically a dump of copying
49550 a param on x86_64 from a hard reg into the frame.
49551 This test is target-specific since the dump contains target-specific
49552 hard reg names. */
49554 static void
49555 ix86_test_loading_dump_fragment_1 ()
49557 rtl_dump_test t (SELFTEST_LOCATION,
49558 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
49560 rtx_insn *insn = get_insn_by_uid (1);
49562 /* The block structure and indentation here is purely for
49563 readability; it mirrors the structure of the rtx. */
49564 tree mem_expr;
49566 rtx pat = PATTERN (insn);
49567 ASSERT_EQ (SET, GET_CODE (pat));
49569 rtx dest = SET_DEST (pat);
49570 ASSERT_EQ (MEM, GET_CODE (dest));
49571 /* Verify the "/c" was parsed. */
49572 ASSERT_TRUE (RTX_FLAG (dest, call));
49573 ASSERT_EQ (SImode, GET_MODE (dest));
49575 rtx addr = XEXP (dest, 0);
49576 ASSERT_EQ (PLUS, GET_CODE (addr));
49577 ASSERT_EQ (DImode, GET_MODE (addr));
49579 rtx lhs = XEXP (addr, 0);
49580 /* Verify that the "frame" REG was consolidated. */
49581 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
49584 rtx rhs = XEXP (addr, 1);
49585 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
49586 ASSERT_EQ (-4, INTVAL (rhs));
49589 /* Verify the "[1 i+0 S4 A32]" was parsed. */
49590 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
49591 /* "i" should have been handled by synthesizing a global int
49592 variable named "i". */
49593 mem_expr = MEM_EXPR (dest);
49594 ASSERT_NE (mem_expr, NULL);
49595 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
49596 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
49597 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
49598 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
49599 /* "+0". */
49600 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
49601 ASSERT_EQ (0, MEM_OFFSET (dest));
49602 /* "S4". */
49603 ASSERT_EQ (4, MEM_SIZE (dest));
49604 /* "A32. */
49605 ASSERT_EQ (32, MEM_ALIGN (dest));
49608 rtx src = SET_SRC (pat);
49609 ASSERT_EQ (REG, GET_CODE (src));
49610 ASSERT_EQ (SImode, GET_MODE (src));
49611 ASSERT_EQ (5, REGNO (src));
49612 tree reg_expr = REG_EXPR (src);
49613 /* "i" here should point to the same var as for the MEM_EXPR. */
49614 ASSERT_EQ (reg_expr, mem_expr);
49619 /* Verify that the RTL loader copes with a call_insn dump.
49620 This test is target-specific since the dump contains a target-specific
49621 hard reg name. */
49623 static void
49624 ix86_test_loading_call_insn ()
49626 /* The test dump includes register "xmm0", where requires TARGET_SSE
49627 to exist. */
49628 if (!TARGET_SSE)
49629 return;
49631 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
49633 rtx_insn *insn = get_insns ();
49634 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
49636 /* "/j". */
49637 ASSERT_TRUE (RTX_FLAG (insn, jump));
49639 rtx pat = PATTERN (insn);
49640 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
49642 /* Verify REG_NOTES. */
49644 /* "(expr_list:REG_CALL_DECL". */
49645 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
49646 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
49647 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
49649 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
49650 rtx_expr_list *note1 = note0->next ();
49651 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
49653 ASSERT_EQ (NULL, note1->next ());
49656 /* Verify CALL_INSN_FUNCTION_USAGE. */
49658 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
49659 rtx_expr_list *usage
49660 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
49661 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
49662 ASSERT_EQ (DFmode, GET_MODE (usage));
49663 ASSERT_EQ (USE, GET_CODE (usage->element ()));
49664 ASSERT_EQ (NULL, usage->next ());
49668 /* Verify that the RTL loader copes a dump from print_rtx_function.
49669 This test is target-specific since the dump contains target-specific
49670 hard reg names. */
49672 static void
49673 ix86_test_loading_full_dump ()
49675 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
49677 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
49679 rtx_insn *insn_1 = get_insn_by_uid (1);
49680 ASSERT_EQ (NOTE, GET_CODE (insn_1));
49682 rtx_insn *insn_7 = get_insn_by_uid (7);
49683 ASSERT_EQ (INSN, GET_CODE (insn_7));
49684 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
49686 rtx_insn *insn_15 = get_insn_by_uid (15);
49687 ASSERT_EQ (INSN, GET_CODE (insn_15));
49688 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
49690 /* Verify crtl->return_rtx. */
49691 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
49692 ASSERT_EQ (0, REGNO (crtl->return_rtx));
49693 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
49696 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
49697 In particular, verify that it correctly loads the 2nd operand.
49698 This test is target-specific since these are machine-specific
49699 operands (and enums). */
49701 static void
49702 ix86_test_loading_unspec ()
49704 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
49706 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
49708 ASSERT_TRUE (cfun);
49710 /* Test of an UNSPEC. */
49711 rtx_insn *insn = get_insns ();
49712 ASSERT_EQ (INSN, GET_CODE (insn));
49713 rtx set = single_set (insn);
49714 ASSERT_NE (NULL, set);
49715 rtx dst = SET_DEST (set);
49716 ASSERT_EQ (MEM, GET_CODE (dst));
49717 rtx src = SET_SRC (set);
49718 ASSERT_EQ (UNSPEC, GET_CODE (src));
49719 ASSERT_EQ (BLKmode, GET_MODE (src));
49720 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
49722 rtx v0 = XVECEXP (src, 0, 0);
49724 /* Verify that the two uses of the first SCRATCH have pointer
49725 equality. */
49726 rtx scratch_a = XEXP (dst, 0);
49727 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
49729 rtx scratch_b = XEXP (v0, 0);
49730 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
49732 ASSERT_EQ (scratch_a, scratch_b);
49734 /* Verify that the two mems are thus treated as equal. */
49735 ASSERT_TRUE (rtx_equal_p (dst, v0));
49737 /* Verify the the insn is recognized. */
49738 ASSERT_NE(-1, recog_memoized (insn));
49740 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
49741 insn = NEXT_INSN (insn);
49742 ASSERT_EQ (INSN, GET_CODE (insn));
49744 set = single_set (insn);
49745 ASSERT_NE (NULL, set);
49747 src = SET_SRC (set);
49748 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
49749 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
49752 /* Run all target-specific selftests. */
49754 static void
49755 ix86_run_selftests (void)
49757 ix86_test_dumping_hard_regs ();
49758 ix86_test_dumping_memory_blockage ();
49760 /* Various tests of loading RTL dumps, here because they contain
49761 ix86-isms (e.g. names of hard regs). */
49762 ix86_test_loading_dump_fragment_1 ();
49763 ix86_test_loading_call_insn ();
49764 ix86_test_loading_full_dump ();
49765 ix86_test_loading_unspec ();
49768 } // namespace selftest
49770 #endif /* CHECKING_P */
49772 /* Initialize the GCC target structure. */
49773 #undef TARGET_RETURN_IN_MEMORY
49774 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
49776 #undef TARGET_LEGITIMIZE_ADDRESS
49777 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
49779 #undef TARGET_ATTRIBUTE_TABLE
49780 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
49781 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
49782 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
49783 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
49784 # undef TARGET_MERGE_DECL_ATTRIBUTES
49785 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
49786 #endif
49788 #undef TARGET_COMP_TYPE_ATTRIBUTES
49789 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
49791 #undef TARGET_INIT_BUILTINS
49792 #define TARGET_INIT_BUILTINS ix86_init_builtins
49793 #undef TARGET_BUILTIN_DECL
49794 #define TARGET_BUILTIN_DECL ix86_builtin_decl
49795 #undef TARGET_EXPAND_BUILTIN
49796 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
49798 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
49799 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
49800 ix86_builtin_vectorized_function
49802 #undef TARGET_VECTORIZE_BUILTIN_GATHER
49803 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
49805 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
49806 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
49808 #undef TARGET_BUILTIN_RECIPROCAL
49809 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
49811 #undef TARGET_ASM_FUNCTION_EPILOGUE
49812 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
49814 #undef TARGET_ENCODE_SECTION_INFO
49815 #ifndef SUBTARGET_ENCODE_SECTION_INFO
49816 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
49817 #else
49818 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
49819 #endif
49821 #undef TARGET_ASM_OPEN_PAREN
49822 #define TARGET_ASM_OPEN_PAREN ""
49823 #undef TARGET_ASM_CLOSE_PAREN
49824 #define TARGET_ASM_CLOSE_PAREN ""
49826 #undef TARGET_ASM_BYTE_OP
49827 #define TARGET_ASM_BYTE_OP ASM_BYTE
49829 #undef TARGET_ASM_ALIGNED_HI_OP
49830 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
49831 #undef TARGET_ASM_ALIGNED_SI_OP
49832 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
49833 #ifdef ASM_QUAD
49834 #undef TARGET_ASM_ALIGNED_DI_OP
49835 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
49836 #endif
49838 #undef TARGET_PROFILE_BEFORE_PROLOGUE
49839 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
49841 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
49842 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
49844 #undef TARGET_ASM_UNALIGNED_HI_OP
49845 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
49846 #undef TARGET_ASM_UNALIGNED_SI_OP
49847 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
49848 #undef TARGET_ASM_UNALIGNED_DI_OP
49849 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
49851 #undef TARGET_PRINT_OPERAND
49852 #define TARGET_PRINT_OPERAND ix86_print_operand
49853 #undef TARGET_PRINT_OPERAND_ADDRESS
49854 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
49855 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
49856 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
49857 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
49858 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
49860 #undef TARGET_SCHED_INIT_GLOBAL
49861 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
49862 #undef TARGET_SCHED_ADJUST_COST
49863 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
49864 #undef TARGET_SCHED_ISSUE_RATE
49865 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
49866 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
49867 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
49868 ia32_multipass_dfa_lookahead
49869 #undef TARGET_SCHED_MACRO_FUSION_P
49870 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
49871 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
49872 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
49874 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
49875 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
49877 #undef TARGET_MEMMODEL_CHECK
49878 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
49880 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
49881 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
49883 #ifdef HAVE_AS_TLS
49884 #undef TARGET_HAVE_TLS
49885 #define TARGET_HAVE_TLS true
49886 #endif
49887 #undef TARGET_CANNOT_FORCE_CONST_MEM
49888 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
49889 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
49890 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
49892 #undef TARGET_DELEGITIMIZE_ADDRESS
49893 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
49895 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
49896 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
49898 #undef TARGET_MS_BITFIELD_LAYOUT_P
49899 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
49901 #if TARGET_MACHO
49902 #undef TARGET_BINDS_LOCAL_P
49903 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
49904 #else
49905 #undef TARGET_BINDS_LOCAL_P
49906 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
49907 #endif
49908 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
49909 #undef TARGET_BINDS_LOCAL_P
49910 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
49911 #endif
49913 #undef TARGET_ASM_OUTPUT_MI_THUNK
49914 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
49915 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
49916 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
49918 #undef TARGET_ASM_FILE_START
49919 #define TARGET_ASM_FILE_START x86_file_start
49921 #undef TARGET_OPTION_OVERRIDE
49922 #define TARGET_OPTION_OVERRIDE ix86_option_override
49924 #undef TARGET_REGISTER_MOVE_COST
49925 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
49926 #undef TARGET_MEMORY_MOVE_COST
49927 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
49928 #undef TARGET_RTX_COSTS
49929 #define TARGET_RTX_COSTS ix86_rtx_costs
49930 #undef TARGET_ADDRESS_COST
49931 #define TARGET_ADDRESS_COST ix86_address_cost
49933 #undef TARGET_FLAGS_REGNUM
49934 #define TARGET_FLAGS_REGNUM FLAGS_REG
49935 #undef TARGET_FIXED_CONDITION_CODE_REGS
49936 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
49937 #undef TARGET_CC_MODES_COMPATIBLE
49938 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
49940 #undef TARGET_MACHINE_DEPENDENT_REORG
49941 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
49943 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
49944 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
49946 #undef TARGET_BUILD_BUILTIN_VA_LIST
49947 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
49949 #undef TARGET_FOLD_BUILTIN
49950 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
49952 #undef TARGET_GIMPLE_FOLD_BUILTIN
49953 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
49955 #undef TARGET_COMPARE_VERSION_PRIORITY
49956 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
49958 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
49959 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
49960 ix86_generate_version_dispatcher_body
49962 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
49963 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
49964 ix86_get_function_versions_dispatcher
49966 #undef TARGET_ENUM_VA_LIST_P
49967 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
49969 #undef TARGET_FN_ABI_VA_LIST
49970 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
49972 #undef TARGET_CANONICAL_VA_LIST_TYPE
49973 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
49975 #undef TARGET_EXPAND_BUILTIN_VA_START
49976 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
49978 #undef TARGET_MD_ASM_ADJUST
49979 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
49981 #undef TARGET_C_EXCESS_PRECISION
49982 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
49983 #undef TARGET_PROMOTE_PROTOTYPES
49984 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
49985 #undef TARGET_SETUP_INCOMING_VARARGS
49986 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
49987 #undef TARGET_MUST_PASS_IN_STACK
49988 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
49989 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
49990 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
49991 #undef TARGET_FUNCTION_ARG_ADVANCE
49992 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
49993 #undef TARGET_FUNCTION_ARG
49994 #define TARGET_FUNCTION_ARG ix86_function_arg
49995 #undef TARGET_INIT_PIC_REG
49996 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
49997 #undef TARGET_USE_PSEUDO_PIC_REG
49998 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
49999 #undef TARGET_FUNCTION_ARG_BOUNDARY
50000 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
50001 #undef TARGET_PASS_BY_REFERENCE
50002 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
50003 #undef TARGET_INTERNAL_ARG_POINTER
50004 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
50005 #undef TARGET_UPDATE_STACK_BOUNDARY
50006 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
50007 #undef TARGET_GET_DRAP_RTX
50008 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
50009 #undef TARGET_STRICT_ARGUMENT_NAMING
50010 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
50011 #undef TARGET_STATIC_CHAIN
50012 #define TARGET_STATIC_CHAIN ix86_static_chain
50013 #undef TARGET_TRAMPOLINE_INIT
50014 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
50015 #undef TARGET_RETURN_POPS_ARGS
50016 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
50018 #undef TARGET_WARN_FUNC_RETURN
50019 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
50021 #undef TARGET_LEGITIMATE_COMBINED_INSN
50022 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
50024 #undef TARGET_ASAN_SHADOW_OFFSET
50025 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
50027 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
50028 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
50030 #undef TARGET_SCALAR_MODE_SUPPORTED_P
50031 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
50033 #undef TARGET_VECTOR_MODE_SUPPORTED_P
50034 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
50036 #undef TARGET_C_MODE_FOR_SUFFIX
50037 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
50039 #ifdef HAVE_AS_TLS
50040 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
50041 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
50042 #endif
50044 #ifdef SUBTARGET_INSERT_ATTRIBUTES
50045 #undef TARGET_INSERT_ATTRIBUTES
50046 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
50047 #endif
50049 #undef TARGET_MANGLE_TYPE
50050 #define TARGET_MANGLE_TYPE ix86_mangle_type
50052 #undef TARGET_STACK_PROTECT_GUARD
50053 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
50055 #if !TARGET_MACHO
50056 #undef TARGET_STACK_PROTECT_FAIL
50057 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
50058 #endif
50060 #undef TARGET_FUNCTION_VALUE
50061 #define TARGET_FUNCTION_VALUE ix86_function_value
50063 #undef TARGET_FUNCTION_VALUE_REGNO_P
50064 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
50066 #undef TARGET_PROMOTE_FUNCTION_MODE
50067 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
50069 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
50070 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
50072 #undef TARGET_MEMBER_TYPE_FORCES_BLK
50073 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
50075 #undef TARGET_INSTANTIATE_DECLS
50076 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
50078 #undef TARGET_SECONDARY_RELOAD
50079 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
50080 #undef TARGET_SECONDARY_MEMORY_NEEDED
50081 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
50082 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
50083 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
50085 #undef TARGET_CLASS_MAX_NREGS
50086 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
50088 #undef TARGET_PREFERRED_RELOAD_CLASS
50089 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
50090 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
50091 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
50092 #undef TARGET_CLASS_LIKELY_SPILLED_P
50093 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
50095 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
50096 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
50097 ix86_builtin_vectorization_cost
50098 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
50099 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
50100 ix86_vectorize_vec_perm_const_ok
50101 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
50102 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
50103 ix86_preferred_simd_mode
50104 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
50105 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
50106 ix86_autovectorize_vector_sizes
50107 #undef TARGET_VECTORIZE_GET_MASK_MODE
50108 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
50109 #undef TARGET_VECTORIZE_INIT_COST
50110 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
50111 #undef TARGET_VECTORIZE_ADD_STMT_COST
50112 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
50113 #undef TARGET_VECTORIZE_FINISH_COST
50114 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
50115 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
50116 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
50118 #undef TARGET_SET_CURRENT_FUNCTION
50119 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
50121 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
50122 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
50124 #undef TARGET_OPTION_SAVE
50125 #define TARGET_OPTION_SAVE ix86_function_specific_save
50127 #undef TARGET_OPTION_RESTORE
50128 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
50130 #undef TARGET_OPTION_POST_STREAM_IN
50131 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
50133 #undef TARGET_OPTION_PRINT
50134 #define TARGET_OPTION_PRINT ix86_function_specific_print
50136 #undef TARGET_OPTION_FUNCTION_VERSIONS
50137 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
50139 #undef TARGET_CAN_INLINE_P
50140 #define TARGET_CAN_INLINE_P ix86_can_inline_p
50142 #undef TARGET_LEGITIMATE_ADDRESS_P
50143 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
50145 #undef TARGET_REGISTER_PRIORITY
50146 #define TARGET_REGISTER_PRIORITY ix86_register_priority
50148 #undef TARGET_REGISTER_USAGE_LEVELING_P
50149 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
50151 #undef TARGET_LEGITIMATE_CONSTANT_P
50152 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
50154 #undef TARGET_COMPUTE_FRAME_LAYOUT
50155 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
50157 #undef TARGET_FRAME_POINTER_REQUIRED
50158 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
50160 #undef TARGET_CAN_ELIMINATE
50161 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
50163 #undef TARGET_EXTRA_LIVE_ON_ENTRY
50164 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
50166 #undef TARGET_ASM_CODE_END
50167 #define TARGET_ASM_CODE_END ix86_code_end
50169 #undef TARGET_CONDITIONAL_REGISTER_USAGE
50170 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
50172 #undef TARGET_CANONICALIZE_COMPARISON
50173 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
50175 #undef TARGET_LOOP_UNROLL_ADJUST
50176 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
50178 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
50179 #undef TARGET_SPILL_CLASS
50180 #define TARGET_SPILL_CLASS ix86_spill_class
50182 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
50183 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
50184 ix86_simd_clone_compute_vecsize_and_simdlen
50186 #undef TARGET_SIMD_CLONE_ADJUST
50187 #define TARGET_SIMD_CLONE_ADJUST \
50188 ix86_simd_clone_adjust
50190 #undef TARGET_SIMD_CLONE_USABLE
50191 #define TARGET_SIMD_CLONE_USABLE \
50192 ix86_simd_clone_usable
50194 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
50195 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
50196 ix86_float_exceptions_rounding_supported_p
50198 #undef TARGET_MODE_EMIT
50199 #define TARGET_MODE_EMIT ix86_emit_mode_set
50201 #undef TARGET_MODE_NEEDED
50202 #define TARGET_MODE_NEEDED ix86_mode_needed
50204 #undef TARGET_MODE_AFTER
50205 #define TARGET_MODE_AFTER ix86_mode_after
50207 #undef TARGET_MODE_ENTRY
50208 #define TARGET_MODE_ENTRY ix86_mode_entry
50210 #undef TARGET_MODE_EXIT
50211 #define TARGET_MODE_EXIT ix86_mode_exit
50213 #undef TARGET_MODE_PRIORITY
50214 #define TARGET_MODE_PRIORITY ix86_mode_priority
50216 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
50217 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
50219 #undef TARGET_LOAD_BOUNDS_FOR_ARG
50220 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
50222 #undef TARGET_STORE_BOUNDS_FOR_ARG
50223 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
50225 #undef TARGET_LOAD_RETURNED_BOUNDS
50226 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
50228 #undef TARGET_STORE_RETURNED_BOUNDS
50229 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
50231 #undef TARGET_CHKP_BOUND_MODE
50232 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
50234 #undef TARGET_BUILTIN_CHKP_FUNCTION
50235 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
50237 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
50238 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
50240 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
50241 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
50243 #undef TARGET_CHKP_INITIALIZE_BOUNDS
50244 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
50246 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
50247 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
50249 #undef TARGET_OFFLOAD_OPTIONS
50250 #define TARGET_OFFLOAD_OPTIONS \
50251 ix86_offload_options
50253 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
50254 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
50256 #undef TARGET_OPTAB_SUPPORTED_P
50257 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
50259 #undef TARGET_HARD_REGNO_SCRATCH_OK
50260 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
50262 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
50263 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
50265 #undef TARGET_ADDITIONAL_ALLOCNO_CLASS_P
50266 #define TARGET_ADDITIONAL_ALLOCNO_CLASS_P ix86_additional_allocno_class_p
50268 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
50269 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
50271 #undef TARGET_INIT_LIBFUNCS
50272 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
50274 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
50275 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
50277 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
50278 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
50280 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
50281 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
50283 #undef TARGET_HARD_REGNO_NREGS
50284 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
50285 #undef TARGET_HARD_REGNO_MODE_OK
50286 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
50288 #undef TARGET_MODES_TIEABLE_P
50289 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
50291 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
50292 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
50293 ix86_hard_regno_call_part_clobbered
50295 #undef TARGET_CAN_CHANGE_MODE_CLASS
50296 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
50298 #undef TARGET_STATIC_RTX_ALIGNMENT
50299 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
50300 #undef TARGET_CONSTANT_ALIGNMENT
50301 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
50303 #if CHECKING_P
50304 #undef TARGET_RUN_TARGET_SELFTESTS
50305 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
50306 #endif /* #if CHECKING_P */
50308 struct gcc_target targetm = TARGET_INITIALIZER;
50310 #include "gt-i386.h"